Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Open and index an MVD file//from  www. ja  v a 2 s .  c  o  m
 * @param docID the docID of the cortex
 * @param w the Lucene index-writer
 */
private static void addCorTextoIndex(String docID, IndexWriter w) throws AeseException {
    try {
        HashMap<String, String> map = getCorTex(docID);
        Set<String> keys = map.keySet();
        Iterator<String> iter = keys.iterator();
        while (iter.hasNext()) {
            String key = iter.next();
            Document d = new Document();
            StringField sf1 = new StringField(LuceneFields.VID, key, Field.Store.YES);
            d.add(sf1);
            StringField sf2 = new StringField(LuceneFields.DOCID, docID, Field.Store.YES);
            d.add(sf2);
            TextField tf = new TextField(LuceneFields.TEXT, map.get(key), Field.Store.NO);
            d.add(tf);
            w.addDocument(d);
        }
    } catch (IOException ioe) {
        throw new AeseException(ioe);
    }
}

From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {

    //Lucene Document??
    String fieldName = "text";
    ////www  .j  a v a  2 s .c  o m
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true);

    Directory directory = null;
    IndexWriter iwriter = null;
    DirectoryReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new LongField("ID", 1000, Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //         String keyword = "";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:cc.twittertools.index.IndexStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(//from  w  w w .  j a v a  2 s .  c om
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexStatuses.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
    config.setOpenMode(OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
            doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
            doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES));
            doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId,
                        Field.Store.YES));
                doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(),
                        Field.Store.YES));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES));
                doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(),
                        Field.Store.YES));
                doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}

From source file:choco.lucene.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {
    //Lucene Document?? 
    String fieldName = "text";
    // /*w  ww . j  a  va  2  s.  c o  m*/
    String text = "IK Analyzer???????";
    //IKAnalyzer? 
    Analyzer analyzer = new IKAnalyzer();
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        // 
        directory = new RAMDirectory();
        //?IndexWriterConfig 
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //          
        Document doc = new Document();
        doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
        iwriter.addDocument(doc);
        iwriter.close();
        //?********************************** 
        //? 
        ireader = IndexReader.open(directory);
        isearcher = new IndexSearcher(ireader);
        String keyword = "?";
        //QueryParser?Query 
        QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        //?5? 
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        // 
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }
    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:ci6226.buildindex.java

/**
 * @param args the command line arguments
 *///from ww  w. ja  v a  2 s .  c om
public static void main(String[] args) throws FileNotFoundException, IOException, ParseException {
    String file = "/home/steven/Dropbox/workspace/ntu_coursework/ci6226/Assiment/yelpdata/yelp_training_set/yelp_training_set_review.json";
    JSONParser parser = new JSONParser();

    BufferedReader in = new BufferedReader(new FileReader(file));
    //  List<Document> jdocs = new LinkedList<Document>();
    Date start = new Date();
    String indexPath = "./myindex";
    System.out.println("Indexing to directory '" + indexPath + "'...");
    // Analyzer analyzer= new NGramAnalyzer(2,8);
    Analyzer analyzer = new myAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    Directory dir = FSDirectory.open(new File(indexPath));
    // :Post-Release-Update-Version.LUCENE_XY:
    // TODO: try different analyzer,stop words,words steming check size
    //   Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);

    // Add new documents to an existing index:
    // iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    // Optional: for better indexing performance, if you
    // are indexing many documents, increase the RAM
    // buffer.  But if you do this, increase the max heap
    // size to the JVM (eg add -Xmx512m or -Xmx1g):
    //
    // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    //  writer.addDocuments(jdocs);
    int line = 0;
    while (in.ready()) {
        String s = in.readLine();
        Object obj = JSONValue.parse(s);
        JSONObject person = (JSONObject) obj;
        String text = (String) person.get("text");
        String user_id = (String) person.get("user_id");
        String business_id = (String) person.get("business_id");
        String review_id = (String) person.get("review_id");
        JSONObject votes = (JSONObject) person.get("votes");
        long funny = (Long) votes.get("funny");
        long cool = (Long) votes.get("cool");
        long useful = (Long) votes.get("useful");
        Document doc = new Document();
        Field review_idf = new StringField("review_id", review_id, Field.Store.YES);
        doc.add(review_idf);
        Field business_idf = new StringField("business_id", business_id, Field.Store.YES);
        doc.add(business_idf);

        //http://qindongliang1922.iteye.com/blog/2030639
        FieldType ft = new FieldType();
        ft.setIndexed(true);//  
        ft.setStored(true);//  
        ft.setStoreTermVectors(true);
        ft.setTokenized(true);
        ft.setStoreTermVectorPositions(true);//?  
        ft.setStoreTermVectorOffsets(true);//???  

        Field textf = new Field("text", text, ft);

        doc.add(textf);
        //    Field user_idf = new StringField("user_id", user_id, Field.Store.YES);
        //     doc.add(user_idf);
        //      doc.add(new LongField("cool", cool, Field.Store.YES));
        //      doc.add(new LongField("funny", funny, Field.Store.YES));
        //       doc.add(new LongField("useful", useful, Field.Store.YES));

        writer.addDocument(doc);

        System.out.println(line++);
    }

    writer.close();
    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    // BufferedReader in = new BufferedReader(new FileReader(file));
    //while (in.ready()) {
    //  String s = in.readLine();
    //  //System.out.println(s);
    // JSONObject jsonObject = (JSONObject) ((Object)s);
    //      String rtext = (String) jsonObject.get("text");
    //      System.out.println(rtext);
    //      //long age = (Long) jsonObject.get("age");
    //      //System.out.println(age);
    //}
    //in.close();
}

From source file:ci6226.buildindex.java

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//from   w w w . j a v a 2  s.  c  o m
 *
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the
 * <a
 * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be
 * stored
 * @param file The file to index, or the directory to recurse into to find
 * files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, String s) throws IOException {
    Object obj = JSONValue.parse(s);
    JSONObject person = (JSONObject) obj;

    String text = (String) person.get("text");
    //  System.out.println(text);

    String user_id = (String) person.get("user_id");
    //  System.out.println(user_id);

    String business_id = (String) person.get("business_id");
    //  System.out.println(business_id);
    String review_id = (String) person.get("review_id");
    //   System.out.println(review_id);

    JSONObject votes = (JSONObject) person.get("votes");

    long funny = (Long) votes.get("funny");
    // System.out.println(funny);

    long cool = (Long) votes.get("cool");
    //   System.out.println(cool);

    long useful = (Long) votes.get("useful");
    // System.out.println(useful);
    // do not try to index files that cannot be read

    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field review_idf = new StringField("review_id", review_id, Field.Store.YES);
    doc.add(review_idf);
    Field business_idf = new StringField("business_id", business_id, Field.Store.YES);
    doc.add(business_idf);
    Field textf = new TextField("text", text, Field.Store.YES);
    doc.add(textf);
    Field user_idf = new StringField("user_id", user_id, Field.Store.YES);
    doc.add(user_idf);

    // Add the last modified date of the file a field named "modified".
    // Use a LongField that is indexed (i.e. efficiently filterable with
    // NumericRangeFilter).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongField("cool", cool, Field.Store.YES));
    doc.add(new LongField("funny", funny, Field.Store.YES));
    doc.add(new LongField("useful", useful, Field.Store.YES));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    //  if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
    // New index, so we just add the document (no old document can be there):
    //   System.out.println("adding " + review_id);
    writer.addDocument(doc);
    //       } else {
    // Existing index (an old copy of this document may have been indexed) so 
    // we use updateDocument instead to replace the old one matching the exact 
    // path, if present:
    //         System.out.println("updating " + file);
    ///          writer.updateDocument(new Term("review_id", review_id), doc);
    //     }

}

From source file:ci6226.eval_index_writer.java

public eval_index_writer(Analyzer _analyzer, String _iReviewLocation, String _dir) throws IOException {
    String file = _iReviewLocation;
    JSONParser parser = new JSONParser();
    BufferedReader in = new BufferedReader(new FileReader(file));
    Date start = new Date();
    String indexPath = "./" + _dir;
    System.out.println("Indexing to directory '" + indexPath + "'...");
    Analyzer analyzer = _analyzer;//from   w w w . j  av  a 2 s . c om
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    Directory dir = FSDirectory.open(new File(indexPath));
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);
    //  int line=0;
    while (in.ready()) {
        String s = in.readLine();
        Object obj = JSONValue.parse(s);
        JSONObject person = (JSONObject) obj;
        String text = (String) person.get("text");
        String user_id = (String) person.get("user_id");
        String business_id = (String) person.get("business_id");
        String review_id = (String) person.get("review_id");
        JSONObject votes = (JSONObject) person.get("votes");
        long funny = (Long) votes.get("funny");
        long cool = (Long) votes.get("cool");
        long useful = (Long) votes.get("useful");
        Document doc = new Document();
        Field review_idf = new StringField("review_id", review_id, Field.Store.YES);
        doc.add(review_idf);
        //    Field business_idf = new StringField("business_id", business_id, Field.Store.YES);
        //     doc.add(business_idf);

        //http://qindongliang1922.iteye.com/blog/2030639
        FieldType ft = new FieldType();
        ft.setIndexed(true);//
        ft.setStored(true);//
        ft.setStoreTermVectors(true);
        ft.setTokenized(true);
        ft.setStoreTermVectorPositions(true);//
        ft.setStoreTermVectorOffsets(true);//

        Field textf = new Field("text", text, ft);

        doc.add(textf);
        //    Field user_idf = new StringField("user_id", user_id, Field.Store.YES);
        //     doc.add(user_idf);
        //      doc.add(new LongField("cool", cool, Field.Store.YES));
        //      doc.add(new LongField("funny", funny, Field.Store.YES));
        //       doc.add(new LongField("useful", useful, Field.Store.YES));

        writer.addDocument(doc);

        //  System.out.println(line++);
    }

    writer.close();
    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");
}

From source file:cn.codepub.redis.directory.Main.java

License:Apache License

public static void testRedisDirectoryWithShardedJedisPool() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer())
            .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //indexWriterConfig.setInfoStream(System.out);
    //indexWriterConfig.setRAMBufferSizeMB(2048);
    //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
    //logByteSizeMergePolicy.setMinMergeMB(1);
    //logByteSizeMergePolicy.setMaxMergeMB(64);
    //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
    //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
    //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
    //?//ww w  .j av  a  2  s. c  o m
    //genericObjectPoolConfig.setMaxWaitMillis(3000);
    //10s
    List<JedisShardInfo> shards = new ArrayList<>();
    JedisShardInfo si = new JedisShardInfo("localhost", 6379, Constants.TIME_OUT);
    //JedisShardInfo si2 = new JedisShardInfo("localhost", 6380);
    shards.add(si);
    //shards.add(si2);
    JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
    ShardedJedisPool shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards);
    RedisDirectory redisDirectory = new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool));
    IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    redisDirectory.close();
    long end = System.currentTimeMillis();
    log.error("RedisDirectoryWithShardedJedisPool consumes {}s!", (end - start) / 1000);
    shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(
            DirectoryReader.open(new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool))));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RedisDirectoryWithShardedJedisPool search consumes {}ms!", (end - start));
}

From source file:cn.fql.blogspider.IndexMain.java

License:Open Source License

static void indexDocs(IndexWriter writer, File file) throws IOException {
    if (file.canRead())
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null)
                for (int i = 0; i < files.length; ++i)
                    indexDocs(writer, new File(file, files[i]));
        } else {/*from   w ww . j av a2  s.c  om*/
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                return;
            }

            try {
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.YES));

                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }
            } finally {
                fis.close();
            }
        }
}

From source file:cn.hbu.cs.esearch.index.BaseSearchIndex.java

License:Apache License

public void updateIndex(LongSet delDocs, List<EsearchIndexable.IndexingReq> insertDocs,
        Analyzer defaultAnalyzer, Similarity similarity) throws IOException {
    if (delDocs != null && delDocs.size() > 0) {
        deleteDocs(delDocs);/*from   ww w  .ja  v a2s.c om*/
    }

    if (insertDocs == null || insertDocs.size() == 0) {
        return;
    }

    IndexWriter idxMod = null;
    try {
        idxMod = openIndexWriter(defaultAnalyzer, similarity);
        if (idxMod != null) {
            for (EsearchIndexable.IndexingReq idxPair : insertDocs) {
                Analyzer analyzer = idxPair.getAnalyzer();
                Document doc = idxPair.getDocument();
                if (analyzer == null) {
                    idxMod.addDocument(doc);
                } else {
                    idxMod.addDocument(doc, analyzer);
                }
            }
        }
    } finally {
        if (idxMod != null) {
            idxMod.commit();
            if (_closeWriterAfterUpdate) {
                closeIndexWriter();
            }
        }
    }
}