Example usage for org.apache.lucene.index IndexWriter deleteDocuments

List of usage examples for org.apache.lucene.index IndexWriter deleteDocuments

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter deleteDocuments.

Prototype

public long deleteDocuments(Query... queries) throws IOException 

Source Link

Document

Deletes the document(s) matching any of the provided queries.

Usage

From source file:org.sonatype.timeline.DefaultTimelineIndexer.java

License:Open Source License

public int purge(long fromTime, long toTime, Set<String> types, Set<String> subTypes) throws TimelineException {
    IndexSearcher searcher = null;//  w w w  .  j ava  2  s .  c o  m

    try {
        synchronized (this) {
            searcher = getIndexSearcher();

            if (searcher.maxDoc() == 0) {
                return 0;
            }

            Query q = buildQuery(fromTime, toTime, types, subTypes);

            // just to know how many will we delete, will not actually load 'em up
            TopFieldDocs topDocs = searcher.search(q, null, searcher.maxDoc(),
                    new Sort(new SortField(TIMESTAMP, SortField.LONG, true)));

            if (topDocs.scoreDocs.length == 0) {
                return 0;
            }

            IndexWriter writer = getIndexWriter();

            writer.deleteDocuments(q);

            writer.commit();

            writer.optimize();

            return topDocs.scoreDocs.length;
        }
    } catch (IOException e) {
        throw new TimelineException("Failed to purge records from the timeline index!", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                getLogger().error("Unable to close searcher", e);
            }
        }
    }
}

From source file:org.talend.dataquality.semantic.broadcast.BroadcastIndexObjectTest.java

License:Open Source License

@Test
public void testCreateAndGet() throws URISyntaxException, IOException {
    // init a local index
    final File testFolder = new File("target/broadcast");
    if (testFolder.exists()) {
        FileUtils.deleteDirectory(testFolder);
    }//from w  ww  . ja v  a 2 s  .  c  o m
    try {
        FSDirectory testDir = FSDirectory.open(testFolder);
        IndexWriter writer = new IndexWriter(testDir,
                new IndexWriterConfig(Version.LATEST, new StandardAnalyzer(CharArraySet.EMPTY_SET)));
        if (writer.maxDoc() > 0) {
            writer.deleteAll();
            writer.commit();
        }
        for (String key : TEST_INDEX_CONTENT.keySet()) {
            Document doc = DictionaryUtils.generateDocument("TEST", key, key,
                    new HashSet<>(Arrays.asList(TEST_INDEX_CONTENT.get(key))));
            writer.addDocument(doc);
        }

        // here we add an extra document and remove it later.
        Document doc = DictionaryUtils.generateDocument("TEST", "DE_LAND", "DE_LAND",
                new HashSet<>(Arrays.asList(new String[] { "Bayern" })));
        writer.addDocument(doc);
        writer.commit();

        // when a document is deleted from lucene index, it's marked as deleted, but not physically deleted.
        // we need to assure that it's not propagated to Spark cluster
        writer.deleteDocuments(new Term(DictionarySearcher.F_CATID, "DE_LAND"));
        writer.commit();

        writer.close();
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    // create the broadcast object from local index
    final Directory cpDir = ClassPathDirectory.open(testFolder.toURI());
    final BroadcastIndexObject bio = new BroadcastIndexObject(cpDir, true);
    // get the RamDirectory from BroadcastIndexObject
    final Directory ramDir = bio.get();

    // assertions
    try {
        DirectoryReader cpDirReader = DirectoryReader.open(cpDir);
        assertEquals("Unexpected document count in created index. ", TEST_INDEX_CONTENT.size(),
                cpDirReader.numDocs());
        DirectoryReader ramDirReader = DirectoryReader.open(ramDir);
        assertEquals("Unexpected document count in created index. ", TEST_INDEX_CONTENT.size(),
                ramDirReader.numDocs());
        for (int i = 0; i < TEST_INDEX_CONTENT.size(); i++) {
            Document doc = cpDirReader.document(i);
            String cpWord = doc.getField(DictionarySearcher.F_CATID).stringValue();
            Document ramDoc = ramDirReader.document(i);
            String ramWord = ramDoc.getField(DictionarySearcher.F_CATID).stringValue();
            assertEquals("Unexpected word", cpWord, ramWord);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:org.wso2.carbon.analytics.dataservice.core.indexing.AnalyticsDataIndexer.java

License:Open Source License

private void deleteInIndex(int tenantId, String tableName, int shardIndex, List<String> ids)
        throws AnalyticsException {
    if (log.isDebugEnabled()) {
        log.debug("Deleting data in local index [" + shardIndex + "]: " + ids.size());
    }//from  ww w  .  j a v  a2  s .c o  m
    String tableId = this.generateTableId(tenantId, tableName);
    IndexWriter indexWriter = this.lookupIndexWriter(shardIndex, tableId);
    List<Term> terms = new ArrayList<Term>(ids.size());
    for (String id : ids) {
        terms.add(new Term(INDEX_ID_INTERNAL_FIELD, id));
    }
    try {
        indexWriter.deleteDocuments(terms.toArray(new Term[terms.size()]));
        indexWriter.commit();
        if (this.isIndexingStatsEnabled()) {
            this.statsCollector.processedRecords(terms.size());
        }
    } catch (IOException e) {
        throw new AnalyticsException("Error in deleting indices: " + e.getMessage(), e);
    }
}

From source file:org.wso2.carbon.analytics.dataservice.indexing.AnalyticsDataIndexer.java

License:Open Source License

private void delete(int tenantId, String tableName, List<String> ids, String shardId)
        throws AnalyticsException {
    if (this.lookupIndices(tenantId, tableName).size() == 0) {
        return;/*from w ww  . j  av  a  2  s  .c om*/
    }
    String tableId = this.generateShardedTableId(tenantId, tableName, shardId);
    IndexWriter indexWriter = this.createIndexWriter(tableId);
    List<Term> terms = new ArrayList<Term>(ids.size());
    for (String id : ids) {
        terms.add(new Term(INDEX_ID_INTERNAL_FIELD, id));
    }
    try {
        indexWriter.deleteDocuments(terms.toArray(new Term[terms.size()]));
        indexWriter.commit();
    } catch (IOException e) {
        throw new AnalyticsException("Error in deleting indices: " + e.getMessage(), e);
    } finally {
        try {
            indexWriter.close();
        } catch (IOException e) {
            log.error("Error closing index writer: " + e.getMessage(), e);
        }
    }
}

From source file:org.wso2.carbon.analytics.dataservice.indexing.AnalyticsDataIndexer.java

License:Open Source License

private void delete(int tenantId, String tableName, long timeFrom, long timeTo, String shardId)
        throws AnalyticsException {
    Map<String, IndexType> indices = this.lookupIndices(tenantId, tableName);
    if (indices.size() == 0) {
        return;//from ww w .j a  v  a  2  s.  c  om
    }
    String tableId = this.generateShardedTableId(tenantId, tableName, shardId);
    IndexWriter indexWriter = this.createIndexWriter(tableId);
    try {
        Query query = new AnalyticsQueryParser(DEFAULT_ANALYZER, indices)
                .parse(INDEX_INTERNAL_TIMESTAMP_FIELD + ":[" + timeFrom + " TO " + timeTo + "}");
        indexWriter.deleteDocuments(query);
        indexWriter.commit();
    } catch (Exception e) {
        throw new AnalyticsException("Error in deleting indices: " + e.getMessage(), e);
    } finally {
        try {
            indexWriter.close();
        } catch (IOException e) {
            log.error("Error closing index writer: " + e.getMessage(), e);
        }
    }
}

From source file:org.xcmis.search.lucene.index.LuceneIndexingService.java

License:Open Source License

protected void softCleanIndex() throws IndexException {
    if (getDocumentCount() > 0) {
        final Directory dir = getDirectory();
        if (dir != null) {
            synchronized (dir) {
                try {
                    final IndexWriter writer = new IndexWriter(super.getDirectory(), new StandardAnalyzer(),
                            MaxFieldLength.UNLIMITED);
                    writer.deleteDocuments(new MatchAllDocsQuery());
                    writer.commit();/*from w w  w .j a  v  a  2  s.com*/
                    writer.optimize();
                    writer.close();
                } catch (final CorruptIndexException e) {
                    throw new IndexException(e.getLocalizedMessage(), e);
                } catch (final LockObtainFailedException e) {
                    throw new IndexException(e.getLocalizedMessage(), e);
                } catch (final IOException e) {
                    throw new IndexException(e.getLocalizedMessage(), e);
                }
            }
        }
    }
}

From source file:org.xcmis.search.lucene.index.PersistedIndex.java

License:Open Source License

/**
 * {@inheritDoc}/*from   w  w w  . jav a2s. c o  m*/
 */
public IndexTransactionModificationReport save(final IndexTransaction<Document> changes) throws IndexException {

    final Set<String> removedDocuments = new HashSet<String>();
    final Set<String> updatedDocuments = new HashSet<String>();

    try {
        // index already started
        synchronized (this.indexDirectiry) {

            final Set<String> removed = changes.getRemovedDocuments();
            IndexWriter writer = null;
            IndexReader reader = null;

            Map<String, Document> updated = null;
            for (final String removedUuid : removed) {

                if (reader == null) {
                    reader = this.getIndexReader();
                }

                if (this.getDocument(removedUuid, reader) != null) {
                    removedDocuments.add(removedUuid);
                }
            }

            if (removedDocuments.size() > 0 || changes.getAddedDocuments().size() > 0) {

                writer = new IndexWriter(this.indexDirectiry, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

                // removed
                for (final String uuid : removedDocuments) {
                    writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
                }
                // updated
                for (final String uuid : updatedDocuments) {
                    // TODO possible use only delete
                    writer.updateDocument(new Term(FieldNames.UUID, uuid), updated.get(uuid));
                }
                // added
                for (final Document document : changes.getAddedDocuments().values()) {
                    writer.addDocument(document);
                }

                writer.commit();
                writer.close();

                this.lastModifedTime = System.currentTimeMillis();
            }
        }

    } catch (final CorruptIndexException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    } catch (final IOException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    }

    return new IndexTransactionModificationReportImpl(changes.getAddedDocuments().keySet(), removedDocuments,
            updatedDocuments);
}

From source file:org.xcmis.search.lucene.index.ReducibleInMemoryIndexDataKeeper.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w  w w .  j  a  v a2s.  co m*/
 */
public IndexTransactionModificationReport save(final IndexTransaction<Document> changes)
        throws IndexException, IndexTransactionException {
    // Accepting only deletes , and removing for updates.
    final Set<String> addedDocuments = new HashSet<String>();
    final Set<String> removedDocuments = new HashSet<String>();
    final Set<String> updatedDocuments = new HashSet<String>();

    try {
        // index already started
        synchronized (indexDirectiry) {
            final Set<String> removed = changes.getRemovedDocuments();
            // int numDoc = 0;
            IndexWriter writer = null;
            for (final String removedUuid : removed) {

                if (this.documentsBuffer.remove(removedUuid) != null) {

                    removedDocuments.add(removedUuid);
                    if (this.isPendingCommited || this.pendingDocumentsBuffer.remove(removedUuid) == null) {
                        if (writer == null) {
                            writer = new IndexWriter(this.indexDirectiry, new StandardAnalyzer(),
                                    MaxFieldLength.UNLIMITED);
                            //to avoid deadlock
                            writer.setMergeScheduler(new SerialMergeScheduler());
                        }
                        writer.deleteDocuments(new Term(FieldNames.UUID, removedUuid));
                    }
                }

            }

            if (writer != null) {
                writer.commit();
                writer.close();
                this.lastModifedTime = System.currentTimeMillis();
            }
        }

    } catch (final CorruptIndexException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    } catch (final IOException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    }

    return new IndexTransactionModificationReportImpl(addedDocuments, removedDocuments, updatedDocuments);
}

From source file:org.xcmis.search.lucene.InMemoryLuceneQueryableIndexStorage.java

License:Open Source License

/**
 * @throws IndexException/*from  w w w .  j a v  a  2s. c  om*/
 * @see org.xcmis.search.lucene.AbstractLuceneQueryableIndexStorage#save(org.xcmis.search.lucene.index.LuceneIndexTransaction)
 */
@Override
protected synchronized Object save(LuceneIndexTransaction indexTransaction)
        throws IndexException, IndexTransactionException {

    try {
        IndexWriter writer = new IndexWriter(ramDirectory, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

        // removed
        for (final String uuid : indexTransaction.getRemovedDocuments()) {
            writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
        }

        // added
        for (final Entry<String, Document> entry : indexTransaction.getAddedDocuments().entrySet()) {
            writer.updateDocument(new Term(FieldNames.UUID, entry.getKey()), entry.getValue());
        }

        writer.commit();
        writer.close();
    } catch (CorruptIndexException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (LockObtainFailedException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (IOException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    }
    return new Object();
}

From source file:perf.Indexer.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    Args args = new Args(clArgs);

    // EG: -facets Date -facets characterCount ...
    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);
    final Set<String> facetFields = new HashSet<String>();
    if (args.hasArg("-facets")) {
        for (String arg : args.getStrings("-facets")) {
            facetFields.add(arg);//from ww  w.j  av a 2  s . c o m
        }
    }

    final String dirImpl = args.getString("-dirImpl");
    final String dirPath = args.getString("-indexPath") + "/index";

    final Directory dir;
    OpenDirectory od = OpenDirectory.get(dirImpl);

    dir = od.open(Paths.get(dirPath));

    final String analyzer = args.getString("-analyzer");
    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(), 2, 2);
    } else if (analyzer.equals("ShingleStandardAnalyzerNoStopWords")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final String lineFile = args.getString("-lineDocsFile");

    // -1 means all docs in the line file:
    final int docCountLimit = args.getInt("-docCountLimit");
    final int numThreads = args.getInt("-threadCount");

    final boolean doForceMerge = args.getFlag("-forceMerge");
    final boolean verbose = args.getFlag("-verbose");

    String indexSortField = null;
    SortField.Type indexSortType = null;

    if (args.hasArg("-indexSort")) {
        indexSortField = args.getString("-indexSort");

        int i = indexSortField.indexOf(':');
        if (i == -1) {
            throw new IllegalArgumentException(
                    "-indexSort should have form field:type; got: " + indexSortField);
        }
        String typeString = indexSortField.substring(i + 1, indexSortField.length());
        if (typeString.equals("long")) {
            indexSortType = SortField.Type.LONG;
        } else if (typeString.equals("string")) {
            indexSortType = SortField.Type.STRING;
        } else {
            throw new IllegalArgumentException("-indexSort can only handle 'long' sort; got: " + typeString);
        }
        indexSortField = indexSortField.substring(0, i);
    } else {
        indexSortType = null;
    }

    final double ramBufferSizeMB = args.getDouble("-ramBufferMB");
    final int maxBufferedDocs = args.getInt("-maxBufferedDocs");

    final String defaultPostingsFormat = args.getString("-postingsFormat");
    final boolean doDeletions = args.getFlag("-deletions");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean waitForMerges = args.getFlag("-waitForMerges");
    final boolean waitForCommit = args.getFlag("-waitForCommit");
    final String mergePolicy = args.getString("-mergePolicy");
    final Mode mode;
    final boolean doUpdate = args.getFlag("-update");
    if (doUpdate) {
        mode = Mode.UPDATE;
    } else {
        mode = Mode.valueOf(args.getString("-mode", "add").toUpperCase(Locale.ROOT));
    }
    int randomDocIDMax;
    if (mode == Mode.UPDATE) {
        randomDocIDMax = args.getInt("-randomDocIDMax");
    } else {
        randomDocIDMax = -1;
    }
    final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
    final boolean addGroupingFields = args.getFlag("-grouping");
    final boolean useCFS = args.getFlag("-cfs");
    final boolean storeBody = args.getFlag("-store");
    final boolean tvsBody = args.getFlag("-tvs");
    final boolean bodyPostingsOffsets = args.getFlag("-bodyPostingsOffsets");
    final int maxConcurrentMerges = args.getInt("-maxConcurrentMerges");
    final boolean addDVFields = args.getFlag("-dvfields");
    final boolean doRandomCommit = args.getFlag("-randomCommit");
    final boolean useCMS = args.getFlag("-useCMS");
    final boolean disableIOThrottle = args.getFlag("-disableIOThrottle");

    if (waitForCommit == false && waitForMerges) {
        throw new RuntimeException("pass -waitForCommit if you pass -waitForMerges");
    }

    if (waitForCommit == false && doForceMerge) {
        throw new RuntimeException("pass -waitForCommit if you pass -forceMerge");
    }

    if (waitForCommit == false && doDeletions) {
        throw new RuntimeException("pass -waitForCommit if you pass -deletions");
    }

    if (useCMS == false && disableIOThrottle) {
        throw new RuntimeException("-disableIOThrottle only makes sense with -useCMS");
    }

    final double nrtEverySec;
    if (args.hasArg("-nrtEverySec")) {
        nrtEverySec = args.getDouble("-nrtEverySec");
    } else {
        nrtEverySec = -1.0;
    }

    // True to start back at the beginning if we run out of
    // docs from the line file source:
    final boolean repeatDocs = args.getFlag("-repeatDocs");

    final String facetDVFormatName;
    if (facetFields.isEmpty()) {
        facetDVFormatName = "Lucene54";
    } else {
        facetDVFormatName = args.getString("-facetDVFormat");
    }

    if (addGroupingFields && docCountLimit == -1) {
        a.close();
        throw new RuntimeException("cannot add grouping fields unless docCount is set");
    }

    args.check();

    System.out.println("Dir: " + dirImpl);
    System.out.println("Index path: " + dirPath);
    System.out.println("Analyzer: " + analyzer);
    System.out.println("Line file: " + lineFile);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Force merge: " + (doForceMerge ? "yes" : "no"));
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("RAM Buffer MB: " + ramBufferSizeMB);
    System.out.println("Max buffered docs: " + maxBufferedDocs);
    System.out.println("Default postings format: " + defaultPostingsFormat);
    System.out.println("Do deletions: " + (doDeletions ? "yes" : "no"));
    System.out.println("Wait for merges: " + (waitForMerges ? "yes" : "no"));
    System.out.println("Wait for commit: " + (waitForCommit ? "yes" : "no"));
    System.out.println("IO throttle: " + (disableIOThrottle ? "no" : "yes"));
    System.out.println("Merge policy: " + mergePolicy);
    System.out.println("Mode: " + mode);
    if (mode == Mode.UPDATE) {
        System.out.println("DocIDMax: " + randomDocIDMax);
    }
    System.out.println("ID field postings format: " + idFieldPostingsFormat);
    System.out.println("Add grouping fields: " + (addGroupingFields ? "yes" : "no"));
    System.out.println("Compound file format: " + (useCFS ? "yes" : "no"));
    System.out.println("Store body field: " + (storeBody ? "yes" : "no"));
    System.out.println("Term vectors for body field: " + (tvsBody ? "yes" : "no"));
    System.out.println("Facet DV Format: " + facetDVFormatName);
    System.out.println("Facet fields: " + facetFields);
    System.out.println("Body postings offsets: " + (bodyPostingsOffsets ? "yes" : "no"));
    System.out.println("Max concurrent merges: " + maxConcurrentMerges);
    System.out.println("Add DocValues fields: " + addDVFields);
    System.out.println("Use ConcurrentMergeScheduler: " + useCMS);
    if (nrtEverySec > 0.0) {
        System.out.println("Open & close NRT reader every: " + nrtEverySec + " sec");
    } else {
        System.out.println("Open & close NRT reader every: never");
    }
    System.out.println("Repeat docs: " + repeatDocs);

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (indexSortField != null) {
        iwc.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
    }

    if (mode == Mode.UPDATE) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    iwc.setMaxBufferedDocs(maxBufferedDocs);
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    // So flushed segments do/don't use CFS:
    iwc.setUseCompoundFile(useCFS);

    final AtomicBoolean indexingFailed = new AtomicBoolean();

    iwc.setMergeScheduler(getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
    iwc.setMergePolicy(getMergePolicy(mergePolicy, useCFS));

    // Keep all commit points:
    if (doDeletions || doForceMerge) {
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    }

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return PostingsFormat.forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
        }

        private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName(facetDVFormatName);
        //private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
        //private final DocValuesFormat diskDVFormat = DocValuesFormat.forName("Disk");
        //        private final DocValuesFormat lucene45DVFormat = DocValuesFormat.forName("Lucene45");
        private final DocValuesFormat directDVFormat = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if (facetFields.contains(field) || field.equals("$facets")) {
                return facetsDVFormat;
                //} else if (field.equals("$facets_sorted_doc_values")) {
                //return diskDVFormat;
            } else {
                // Use default DVFormat for all else:
                // System.out.println("DV: field=" + field + " format=" + super.getDocValuesFormatForField(field));
                return super.getDocValuesFormatForField(field);
            }
        }
    };

    iwc.setCodec(codec);

    System.out.println("IW config=" + iwc);

    IndexWriter w = new IndexWriter(dir, iwc);

    System.out.println("Index has " + w.maxDoc() + " docs");

    final TaxonomyWriter taxoWriter;
    if (facetFields.isEmpty() == false) {
        taxoWriter = new DirectoryTaxonomyWriter(od.open(Paths.get(args.getString("-indexPath"), "facets")),
                IndexWriterConfig.OpenMode.CREATE);
    } else {
        taxoWriter = null;
    }

    // Fixed seed so group field values are always consistent:
    final Random random = new Random(17);

    LineFileDocs lineFileDocs = new LineFileDocs(lineFile, repeatDocs, storeBody, tvsBody, bodyPostingsOffsets,
            false, taxoWriter, facetFields, facetsConfig, addDVFields);

    float docsPerSecPerThread = -1f;
    //float docsPerSecPerThread = 100f;

    IndexThreads threads = new IndexThreads(random, w, indexingFailed, lineFileDocs, numThreads, docCountLimit,
            addGroupingFields, printDPS, mode, docsPerSecPerThread, null, nrtEverySec, randomDocIDMax);

    System.out.println("\nIndexer: start");
    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done() && indexingFailed.get() == false) {
        Thread.sleep(100);

        // Commits once per minute on average:
        if (doRandomCommit && random.nextInt(600) == 17) {
            System.out.println("Indexer: now commit");
            long commitStartNS = System.nanoTime();
            w.commit();
            System.out.println(String.format(Locale.ROOT, "Indexer: commit took %.1f msec",
                    (System.nanoTime() - commitStartNS) / 1000000.));
        }
    }

    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println("\nIndexer: indexing done (" + (t1 - t0) + " msec); total " + w.maxDoc() + " docs");
    // if we update we can not tell how many docs
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }
    if (mode != Mode.UPDATE && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }

    final Map<String, String> commitData = new HashMap<String, String>();

    if (waitForMerges) {
        w.close();
        IndexWriterConfig iwc2 = new IndexWriterConfig(a);
        iwc2.setMergeScheduler(
                getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
        iwc2.setMergePolicy(getMergePolicy(mergePolicy, useCFS));
        iwc2.setCodec(codec);
        iwc2.setUseCompoundFile(useCFS);
        iwc2.setMaxBufferedDocs(maxBufferedDocs);
        iwc2.setRAMBufferSizeMB(ramBufferSizeMB);
        if (indexSortField != null) {
            iwc2.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
        }

        w = new IndexWriter(dir, iwc2);
        long t2 = System.currentTimeMillis();
        System.out.println("\nIndexer: waitForMerges done (" + (t2 - t1) + " msec)");
    }

    if (waitForCommit) {
        commitData.put("userData", "multi");
        w.setLiveCommitData(commitData.entrySet());
        long t2 = System.currentTimeMillis();
        w.commit();
        long t3 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit multi (took " + (t3 - t2) + " msec)");
    } else {
        w.rollback();
        w = null;
    }

    if (doForceMerge) {
        long forceMergeStartMSec = System.currentTimeMillis();
        w.forceMerge(1);
        long forceMergeEndMSec = System.currentTimeMillis();
        System.out.println(
                "\nIndexer: force merge done (took " + (forceMergeEndMSec - forceMergeStartMSec) + " msec)");

        commitData.put("userData", "single");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t5 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit single done (took " + (t5 - forceMergeEndMSec) + " msec)");
    }

    if (doDeletions) {
        final long t5 = System.currentTimeMillis();
        // Randomly delete 5% of the docs
        final Set<Integer> deleted = new HashSet<Integer>();
        final int maxDoc = w.maxDoc();
        final int toDeleteCount = (int) (maxDoc * 0.05);
        System.out.println("\nIndexer: delete " + toDeleteCount + " docs");
        while (deleted.size() < toDeleteCount) {
            final int id = random.nextInt(maxDoc);
            if (!deleted.contains(id)) {
                deleted.add(id);
                w.deleteDocuments(new Term("id", LineFileDocs.intToID(id)));
            }
        }
        final long t6 = System.currentTimeMillis();
        System.out.println("\nIndexer: deletes done (took " + (t6 - t5) + " msec)");

        commitData.put("userData", doForceMerge ? "delsingle" : "delmulti");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t7 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit delmulti done (took " + (t7 - t6) + " msec)");

        if (doUpdate || w.numDocs() != maxDoc - toDeleteCount) {
            throw new RuntimeException(
                    "count mismatch: w.numDocs()=" + w.numDocs() + " but expected " + (maxDoc - toDeleteCount));
        }
    }

    if (taxoWriter != null) {
        System.out.println("Taxonomy has " + taxoWriter.getSize() + " ords");
        taxoWriter.commit();
        taxoWriter.close();
    }

    final long tCloseStart = System.currentTimeMillis();
    if (w != null) {
        w.close();
        w = null;
    }
    if (waitForCommit) {
        System.out.println("\nIndexer: at close: " + SegmentInfos.readLatestCommit(dir));
        System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) + " msec");
    }

    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());

    final long indexingTime;
    if (waitForCommit) {
        indexingTime = tFinal - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec)");
    } else {
        indexingTime = t1 - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec), excluding commit");
    }
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / (indexingTime / 3600000.))
                    + " GB/hour plain text");
}