List of usage examples for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS
IndexOptions DOCS_AND_FREQS_AND_POSITIONS
To view the source code for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS.
Click Source Link
From source file:com.lucure.core.codec.LucurePostingsWriter.java
License:Apache License
@Override public int setField(FieldInfo fieldInfo) { IndexOptions indexOptions = fieldInfo.getIndexOptions(); fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; fieldHasPayloads = fieldInfo.hasPayloads(); skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads); lastState = emptyState;// w w w . j av a2s. c o m if (fieldHasPositions) { if (fieldHasPayloads || fieldHasOffsets) { return 3; // doc + pos + pay FP } else { return 2; // doc + pos FP } } else { return 1; // doc FP } }
From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java
License:Apache License
@Override public boolean hasPositions() { return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; }
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Open Source License
@Override public void addPosition(final int position, final BytesRef payload, final int startOffset, final int endOffset) throws IOException { assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // we always receive node ids in the payload assert payload != null; // decode payload sirenPayload.decode(payload);//from w w w . j a va 2 s .c om final IntsRef node = sirenPayload.getNode(); // check if we received the same node // TODO: we pay the cost of decoding the node before testing the equality // we could instead directly compute the node hash based on the byte array final int nodeHash = node.hashCode(); if (lastNodeHash != nodeHash) { // if different node // add term freq for previous node if not first payload. if (lastNodeHash != Long.MAX_VALUE) { this.addTermFreqInNode(); } // add new node this.addNode(node); } lastNodeHash = nodeHash; // add position this.addPosition(sirenPayload.getPosition()); }
From source file:com.sindicetech.siren.util.SirenTestCase.java
License:Open Source License
private static FieldType newFieldType() { final FieldType ft = new FieldType(); ft.setStored(false);//from w ww. j av a 2s. c o m ft.setOmitNorms(false); ft.setIndexed(true); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); ft.setTokenized(true); return ft; }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException { switch (b) {/* www. j av a2 s . c o m*/ case 0: return IndexOptions.NONE; case 1: return IndexOptions.DOCS; case 2: return IndexOptions.DOCS_AND_FREQS; case 3: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; case 4: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; default: // BUG throw new CorruptIndexException("invalid IndexOptions byte: " + b, input); } }
From source file:edu.umass.cs.ciir.IndexFromGalago.java
License:Open Source License
public static void main(String[] args) throws Exception { Parameters argp = Parameters.parseArgs(args); String galagoIndexPath = null; String luceneIndexPath = null; try {//from www.ja v a 2 s . co m galagoIndexPath = argp.getString("galagoIndex"); luceneIndexPath = argp.getString("luceneIndex"); } catch (Exception e) { System.out.println(getUsage()); return; } logger.setUseParentHandlers(false); FileHandler lfh = new FileHandler("indexing-errors.log"); SimpleFormatter formatter = new SimpleFormatter(); lfh.setFormatter(formatter); logger.addHandler(lfh); final DiskIndex index = new DiskIndex(argp.get("index", galagoIndexPath)); final CorpusReader corpus = (CorpusReader) index.getIndexPart("corpus"); long total = corpus.getManifest().getLong("keyCount"); final CorpusReader.KeyIterator iterator = corpus.getIterator(); final Document.DocumentComponents dcp = Document.DocumentComponents.JustText; // Analyzer includes options for text processing Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { // Step 1: tokenization (Lucene's StandardTokenizer is suitable for most text retrieval occasions) TokenStreamComponents ts = new TokenStreamComponents(new StandardTokenizer()); // Step 2: transforming all tokens into lowercased ones ts = new Analyzer.TokenStreamComponents(ts.getTokenizer(), new LowerCaseFilter(ts.getTokenStream())); // Step 3: whether to remove stop words // Uncomment the following line to remove stop words // ts = new TokenStreamComponents( ts.getTokenizer(), new StopwordsFilter( ts.getTokenStream(), StandardAnalyzer.ENGLISH_STOP_WORDS_SET ) ); // Step 4: whether to apply stemming // Uncomment the following line to apply Krovetz or Porter stemmer // ts = new TokenStreamComponents( ts.getTokenizer(), new KStemFilter( ts.getTokenStream() ) ); // ts = new TokenStreamComponents( ts.getTokenizer(), new PorterStemFilter( ts.getTokenStream() ) ); return ts; } }; try (final FSDirectory dir = FSDirectory.open(Paths.get(argp.get("output", luceneIndexPath)))) { final IndexWriterConfig cfg = new IndexWriterConfig(analyzer); System.out.println("Similarity: " + cfg.getSimilarity()); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(dir, cfg)) { iterator.forAllKeyStrings(docId -> { try { Document document = iterator.getDocument(dcp); String text = document.text; String id = document.name; System.out.println("Processing document: " + id); org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); doc.add(new StringField("id", id, Field.Store.YES)); // this stores the actual text with tags so formatting is preserved doc.add(new StoredField("body", text)); org.jsoup.nodes.Document jsoup = Jsoup.parse(text); // tokens of the document FieldType fieldTypeText = new FieldType(); fieldTypeText.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); fieldTypeText.setStoreTermVectors(true); fieldTypeText.setStoreTermVectorPositions(true); fieldTypeText.setTokenized(true); fieldTypeText.setStored(false); fieldTypeText.freeze(); doc.add(new Field("tokens", jsoup.text(), fieldTypeText)); try { writer.addDocument(doc); System.out.println("Doc count: " + writer.numDocs()); } catch (IOException e) { logger.log(Level.WARNING, "Pull-Document-Exception", e); System.err.println(e.toString()); } } catch (Exception e) { logger.log(Level.WARNING, "Pull-Document-Exception", e); System.err.println(e.toString()); } }); } } System.out.println("Indexing Done. "); }
From source file:io.anserini.index.generator.LuceneDocumentGenerator.java
License:Apache License
public Document createDocument(SourceDocument src) { String id = src.id();/*from w w w . java 2s . c o m*/ String contents; try { // If there's a transform, use it. contents = transform != null ? transform.apply(src.content()) : src.content(); } catch (Exception e) { LOG.error("Error extracting document text, skipping document: " + id, e); counters.errors.incrementAndGet(); return null; } if (contents.trim().length() == 0) { LOG.info("Empty document: " + id); counters.emptyDocuments.incrementAndGet(); return null; } // make a new, empty document Document document = new Document(); // document id document.add(new StringField(FIELD_ID, id, Field.Store.YES)); if (args.storeRawDocs) { document.add(new StoredField(FIELD_RAW, src.content())); } FieldType fieldType = new FieldType(); fieldType.setStored(args.storeTransformedDocs); // Are we storing document vectors? if (args.storeDocvectors) { fieldType.setStoreTermVectors(true); fieldType.setStoreTermVectorPositions(true); } // Are we building a "positional" or "count" index? if (args.storePositions) { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); } else { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); } document.add(new Field(FIELD_BODY, contents, fieldType)); return document; }
From source file:io.anserini.index.generator.TweetGenerator.java
License:Apache License
@Override public Document createDocument(TweetCollection.Document tweetDoc) { String id = tweetDoc.id();//from ww w .j a v a2 s . c o m if (tweetDoc.content().trim().isEmpty()) { counters.empty.incrementAndGet(); return null; } final TwitterTextParseResults result = TwitterTextParser.parseTweet(tweetDoc.content().trim()); if (!result.isValid) { counters.errors.incrementAndGet(); return null; } String text = tweetDoc.content().trim().substring(result.validTextRange.start, result.validTextRange.end); if (!args.tweetKeepUrls) { final Extractor extractor = new Extractor(); final List<String> urls = extractor.extractURLs(text); for (String url : urls) { text = text.replaceAll(url, ""); } } text = text.trim(); if (text.isEmpty()) { counters.empty.incrementAndGet(); return null; } // Skip deletes tweetids. if (deletes != null && deletes.contains(id)) { counters.skipped.incrementAndGet(); return null; } if (tweetDoc.getIdLong() > args.tweetMaxId) { counters.skipped.incrementAndGet(); return null; } if (!args.tweetKeepRetweets && tweetDoc.getRetweetedStatusId().isPresent()) { counters.skipped.incrementAndGet(); return null; } Document doc = new Document(); doc.add(new StringField(FIELD_ID, id, Field.Store.YES)); // We need this to break scoring ties. doc.add(new LongPoint(StatusField.ID_LONG.name, tweetDoc.getIdLong())); doc.add(new NumericDocValuesField(StatusField.ID_LONG.name, tweetDoc.getIdLong())); tweetDoc.getEpoch().ifPresent(epoch -> doc.add(new LongPoint(StatusField.EPOCH.name, epoch))); doc.add(new StringField(StatusField.SCREEN_NAME.name, tweetDoc.getScreenName(), Field.Store.NO)); doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, tweetDoc.getFollowersCount())); doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, tweetDoc.getFriendsCount())); doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, tweetDoc.getStatusesCount())); tweetDoc.getInReplyToStatusId().ifPresent(rid -> { doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, rid)); tweetDoc.getInReplyToUserId() .ifPresent(ruid -> doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, ruid))); }); tweetDoc.getRetweetedStatusId().ifPresent(rid -> { doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, rid)); tweetDoc.getRetweetedUserId() .ifPresent(ruid -> doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, ruid))); tweetDoc.getRetweetCount().ifPresent(rc -> doc.add(new LongPoint(StatusField.RETWEET_COUNT.name, rc))); }); tweetDoc.getLang().ifPresent(lang -> doc.add(new StringField(StatusField.LANG.name, lang, Field.Store.NO))); if (args.storeRawDocs) { // store the raw json string as one single field doc.add(new StoredField(FIELD_RAW, tweetDoc.getJsonString())); } FieldType fieldType = new FieldType(); fieldType.setStored(args.storeTransformedDocs); // Are we storing document vectors? if (args.storeDocvectors) { fieldType.setStoreTermVectors(true); fieldType.setStoreTermVectorPositions(true); } // Are we building a "positional" or "count" index? if (args.storePositions) { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); } else { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); } doc.add(new Field(FIELD_BODY, text, fieldType)); return doc; }
From source file:io.anserini.index.IndexTweets.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(/* w w w. j a va2s. c o m*/ OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweets.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(Paths.get(indexPath)); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }
From source file:io.anserini.index.IndexTweetsUpdatePlace.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("collection").hasArg() .withDescription("source collection directory").create(COLLECTION_OPTION)); options.addOption(// w w w . ja v a2 s .c o m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweetsUpdatePlace.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); System.out.println(collectionPath + " " + indexPath); LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } final StatusStream stream = new JsonStatusCorpusReader(file); final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION))); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); final IndexWriter writer = new IndexWriter(dir, config); System.out.print("Original # of docs " + writer.numDocs()); int updateCount = 0; Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { stream.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } try { writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { dir.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("Shutting down"); } }); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getPlace() != null) { // Query q = NumericRangeQuery.newLongRange(TweetStreamReader.StatusField.ID.name, status.getId(), // status.getId(), true, true); // System.out.print("Deleting docCount="+writer.numDocs()); // writer.deleteDocuments(q); // writer.commit(); // System.out.print(" Deleted docCount="+writer.numDocs()); Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); doc.add(new DoubleField(StatusField.LONGITUDE.name, status.getLongitude(), Store.YES)); doc.add(new DoubleField(StatusField.LATITUDE.name, status.getlatitude(), Store.YES)); doc.add(new StringField(StatusField.PLACE.name, status.getPlace(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } long id = status.getId(); BytesRefBuilder brb = new BytesRefBuilder(); NumericUtils.longToPrefixCodedBytes(id, 0, brb); Term term = new Term(StatusField.ID.name, brb.get()); writer.updateDocument(term, doc); // writer.addDocument(doc); updateCount += 1; if (updateCount % 10000 == 0) { LOG.info(updateCount + " statuses updated"); writer.commit(); System.out.println("Updated docCount=" + writer.numDocs()); } } } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }