Example usage for org.apache.lucene.index NoDeletionPolicy INSTANCE

List of usage examples for org.apache.lucene.index NoDeletionPolicy INSTANCE

Introduction

In this page you can find the example usage for org.apache.lucene.index NoDeletionPolicy INSTANCE.

Prototype

IndexDeletionPolicy INSTANCE

To view the source code for org.apache.lucene.index NoDeletionPolicy INSTANCE.

Click Source Link

Document

The single instance of this class.

Usage

From source file:io.anserini.IndexerCW09B.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    System.out.println(//from w w w  . jav a2  s.co  m
            "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);

    for (Path f : discoverWarcFiles(docDir))
        executor.execute(new IndexerThread(writer, f));

    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);
    executor.shutdown(); // Disable new tasks from being submitted

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
    } finally {
        writer.close();
    }

    return numIndexed;
}

From source file:io.druid.extension.lucene.LuceneDruidSegment.java

License:Apache License

private static IndexWriter buildRamWriter(RAMDirectory dir, Analyzer analyzer, int maxDocsPerSegment)
        throws IOException {
    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    // some arbitrary large numbers
    writerConfig.setMaxBufferedDocs(maxDocsPerSegment * 2);
    writerConfig.setRAMBufferSizeMB(5000);
    writerConfig.setUseCompoundFile(false);
    writerConfig.setCommitOnClose(true);
    writerConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    writerConfig.setMergePolicy(NoMergePolicy.INSTANCE);
    writerConfig.setMergeScheduler(NoMergeScheduler.INSTANCE);
    return new IndexWriter(dir, writerConfig);
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testWaitForIndex() throws Exception {
    final MockDirectoryWrapper dir = newMockDirectory();

    final AtomicBoolean succeeded = new AtomicBoolean(false);
    final CountDownLatch latch = new CountDownLatch(1);

    // Create a shadow Engine, which will freak out because there is no
    // index yet// www.  j a va2 s . c  o m
    Thread t = new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                latch.await();
                if (Lucene.waitForIndex(dir, 5000)) {
                    succeeded.set(true);
                } else {
                    fail("index should have eventually existed!");
                }
            } catch (InterruptedException e) {
                // ignore interruptions
            } catch (Exception e) {
                fail("should have been able to create the engine! " + e.getMessage());
            }
        }
    });
    t.start();

    // count down latch
    // now shadow engine should try to be created
    latch.countDown();

    dir.setEnableVirusScanner(false);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    t.join();

    writer.close();
    dir.close();
    assertTrue("index should have eventually existed", succeeded.get());
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testCleanIndex() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);/* ww w. ja v a2s  .co m*/
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    try (DirectoryReader open = DirectoryReader.open(writer, true)) {
        assertEquals(3, open.numDocs());
        assertEquals(1, open.numDeletedDocs());
        assertEquals(4, open.maxDoc());
    }
    writer.close();
    if (random().nextBoolean()) {
        for (String file : dir.listAll()) {
            if (file.startsWith("_1")) {
                // delete a random file
                dir.deleteFile(file);
                break;
            }
        }
    }
    Lucene.cleanLuceneIndex(dir);
    if (dir.listAll().length > 0) {
        for (String file : dir.listAll()) {
            if (file.startsWith("extra") == false) {
                assertEquals(file, "write.lock");
            }
        }
    }
    dir.close();
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testPruneUnreferencedFiles() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);//w w  w  .  jav  a2  s  .c o m
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);

    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    DirectoryReader open = DirectoryReader.open(writer, true);
    assertEquals(3, open.numDocs());
    assertEquals(1, open.numDeletedDocs());
    assertEquals(4, open.maxDoc());
    open.close();
    writer.close();
    SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir);
    assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName());
    open = DirectoryReader.open(dir);
    assertEquals(3, open.numDocs());
    assertEquals(0, open.numDeletedDocs());
    assertEquals(3, open.maxDoc());

    IndexSearcher s = new IndexSearcher(open);
    assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0);

    for (String file : dir.listAll()) {
        assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2"));
    }
    open.close();
    dir.close();

}

From source file:org.elasticsearch.index.store.StoreTest.java

License:Apache License

@Test
public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, ImmutableSettings.EMPTY, directoryService,
            randomDistributor(directoryService), new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig(random(), TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setCodec(actualDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
        if (i > 0 && randomIntBetween(0, 10) == 0) {
            writer.commit();/* w ww .  jav a2s  . c  o  m*/
            numCommits++;
        }
        Document doc = new Document();
        doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);

    }
    if (numCommits < 1) {
        writer.commit();
        Document doc = new Document();
        doc.add(new TextField("id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
        for (int i = 0; i < docs; i++) {
            if (random().nextBoolean()) {
                Document doc = new Document();
                doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                        random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                writer.updateDocument(new Term("id", "" + i), doc);
            }
        }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
        if (file.equals("write.lock") || file.equals(IndexFileNames.SEGMENTS_GEN)) {
            continue;
        }
        BytesRef hash = new BytesRef();
        if (file.startsWith("segments")) {
            hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
        }
        StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file),
                file + "checksum", null, hash);
        legacyMeta.put(file, storeFileMetaData);
        checksums.add(storeFileMetaData);
    }
    checksums.write(store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
        store.cleanupAndVerify("test", firstMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            assertTrue(firstMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (secondMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    } else {
        store.cleanupAndVerify("test", secondMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            assertTrue(file, secondMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (firstMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    }

    deleteContent(store.directory());
    IOUtils.close(store);
}

From source file:org.elasticsearch.index.store.StoreTests.java

License:Apache License

@Test
public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig(random(), new MockAnalyzer(random()))
            .setCodec(TestUtil.getDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
        if (i > 0 && randomIntBetween(0, 10) == 0) {
            writer.commit();//from  w  w w  .ja  v  a2s.c  o m
            numCommits++;
        }
        Document doc = new Document();
        doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);

    }
    if (numCommits < 1) {
        writer.commit();
        Document doc = new Document();
        doc.add(new TextField("id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
        for (int i = 0; i < docs; i++) {
            if (random().nextBoolean()) {
                Document doc = new Document();
                doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                        random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                writer.updateDocument(new Term("id", "" + i), doc);
            }
        }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
        if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)
                || file.startsWith("extra")) {
            continue;
        }
        BytesRef hash = new BytesRef();
        if (file.startsWith("segments")) {
            hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
        }
        StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file),
                file + "checksum", null, hash);
        legacyMeta.put(file, storeFileMetaData);
        checksums.add(storeFileMetaData);
    }
    checksums.write(store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
        store.cleanupAndVerify("test", firstMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            if (file.startsWith("extra")) {
                continue;
            }
            assertTrue(firstMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (secondMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    } else {
        store.cleanupAndVerify("test", secondMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            if (file.startsWith("extra")) {
                continue;
            }
            assertTrue(file, secondMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (firstMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    }

    deleteContent(store.directory());
    IOUtils.close(store);
}

From source file:perf.Indexer.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    Args args = new Args(clArgs);

    // EG: -facets Date -facets characterCount ...
    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);
    final Set<String> facetFields = new HashSet<String>();
    if (args.hasArg("-facets")) {
        for (String arg : args.getStrings("-facets")) {
            facetFields.add(arg);/*from   w w  w  . j a va2s .  c o m*/
        }
    }

    final String dirImpl = args.getString("-dirImpl");
    final String dirPath = args.getString("-indexPath") + "/index";

    final Directory dir;
    OpenDirectory od = OpenDirectory.get(dirImpl);

    dir = od.open(Paths.get(dirPath));

    final String analyzer = args.getString("-analyzer");
    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(), 2, 2);
    } else if (analyzer.equals("ShingleStandardAnalyzerNoStopWords")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final String lineFile = args.getString("-lineDocsFile");

    // -1 means all docs in the line file:
    final int docCountLimit = args.getInt("-docCountLimit");
    final int numThreads = args.getInt("-threadCount");

    final boolean doForceMerge = args.getFlag("-forceMerge");
    final boolean verbose = args.getFlag("-verbose");

    String indexSortField = null;
    SortField.Type indexSortType = null;

    if (args.hasArg("-indexSort")) {
        indexSortField = args.getString("-indexSort");

        int i = indexSortField.indexOf(':');
        if (i == -1) {
            throw new IllegalArgumentException(
                    "-indexSort should have form field:type; got: " + indexSortField);
        }
        String typeString = indexSortField.substring(i + 1, indexSortField.length());
        if (typeString.equals("long")) {
            indexSortType = SortField.Type.LONG;
        } else if (typeString.equals("string")) {
            indexSortType = SortField.Type.STRING;
        } else {
            throw new IllegalArgumentException("-indexSort can only handle 'long' sort; got: " + typeString);
        }
        indexSortField = indexSortField.substring(0, i);
    } else {
        indexSortType = null;
    }

    final double ramBufferSizeMB = args.getDouble("-ramBufferMB");
    final int maxBufferedDocs = args.getInt("-maxBufferedDocs");

    final String defaultPostingsFormat = args.getString("-postingsFormat");
    final boolean doDeletions = args.getFlag("-deletions");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean waitForMerges = args.getFlag("-waitForMerges");
    final boolean waitForCommit = args.getFlag("-waitForCommit");
    final String mergePolicy = args.getString("-mergePolicy");
    final Mode mode;
    final boolean doUpdate = args.getFlag("-update");
    if (doUpdate) {
        mode = Mode.UPDATE;
    } else {
        mode = Mode.valueOf(args.getString("-mode", "add").toUpperCase(Locale.ROOT));
    }
    int randomDocIDMax;
    if (mode == Mode.UPDATE) {
        randomDocIDMax = args.getInt("-randomDocIDMax");
    } else {
        randomDocIDMax = -1;
    }
    final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
    final boolean addGroupingFields = args.getFlag("-grouping");
    final boolean useCFS = args.getFlag("-cfs");
    final boolean storeBody = args.getFlag("-store");
    final boolean tvsBody = args.getFlag("-tvs");
    final boolean bodyPostingsOffsets = args.getFlag("-bodyPostingsOffsets");
    final int maxConcurrentMerges = args.getInt("-maxConcurrentMerges");
    final boolean addDVFields = args.getFlag("-dvfields");
    final boolean doRandomCommit = args.getFlag("-randomCommit");
    final boolean useCMS = args.getFlag("-useCMS");
    final boolean disableIOThrottle = args.getFlag("-disableIOThrottle");

    if (waitForCommit == false && waitForMerges) {
        throw new RuntimeException("pass -waitForCommit if you pass -waitForMerges");
    }

    if (waitForCommit == false && doForceMerge) {
        throw new RuntimeException("pass -waitForCommit if you pass -forceMerge");
    }

    if (waitForCommit == false && doDeletions) {
        throw new RuntimeException("pass -waitForCommit if you pass -deletions");
    }

    if (useCMS == false && disableIOThrottle) {
        throw new RuntimeException("-disableIOThrottle only makes sense with -useCMS");
    }

    final double nrtEverySec;
    if (args.hasArg("-nrtEverySec")) {
        nrtEverySec = args.getDouble("-nrtEverySec");
    } else {
        nrtEverySec = -1.0;
    }

    // True to start back at the beginning if we run out of
    // docs from the line file source:
    final boolean repeatDocs = args.getFlag("-repeatDocs");

    final String facetDVFormatName;
    if (facetFields.isEmpty()) {
        facetDVFormatName = "Lucene54";
    } else {
        facetDVFormatName = args.getString("-facetDVFormat");
    }

    if (addGroupingFields && docCountLimit == -1) {
        a.close();
        throw new RuntimeException("cannot add grouping fields unless docCount is set");
    }

    args.check();

    System.out.println("Dir: " + dirImpl);
    System.out.println("Index path: " + dirPath);
    System.out.println("Analyzer: " + analyzer);
    System.out.println("Line file: " + lineFile);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Force merge: " + (doForceMerge ? "yes" : "no"));
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("RAM Buffer MB: " + ramBufferSizeMB);
    System.out.println("Max buffered docs: " + maxBufferedDocs);
    System.out.println("Default postings format: " + defaultPostingsFormat);
    System.out.println("Do deletions: " + (doDeletions ? "yes" : "no"));
    System.out.println("Wait for merges: " + (waitForMerges ? "yes" : "no"));
    System.out.println("Wait for commit: " + (waitForCommit ? "yes" : "no"));
    System.out.println("IO throttle: " + (disableIOThrottle ? "no" : "yes"));
    System.out.println("Merge policy: " + mergePolicy);
    System.out.println("Mode: " + mode);
    if (mode == Mode.UPDATE) {
        System.out.println("DocIDMax: " + randomDocIDMax);
    }
    System.out.println("ID field postings format: " + idFieldPostingsFormat);
    System.out.println("Add grouping fields: " + (addGroupingFields ? "yes" : "no"));
    System.out.println("Compound file format: " + (useCFS ? "yes" : "no"));
    System.out.println("Store body field: " + (storeBody ? "yes" : "no"));
    System.out.println("Term vectors for body field: " + (tvsBody ? "yes" : "no"));
    System.out.println("Facet DV Format: " + facetDVFormatName);
    System.out.println("Facet fields: " + facetFields);
    System.out.println("Body postings offsets: " + (bodyPostingsOffsets ? "yes" : "no"));
    System.out.println("Max concurrent merges: " + maxConcurrentMerges);
    System.out.println("Add DocValues fields: " + addDVFields);
    System.out.println("Use ConcurrentMergeScheduler: " + useCMS);
    if (nrtEverySec > 0.0) {
        System.out.println("Open & close NRT reader every: " + nrtEverySec + " sec");
    } else {
        System.out.println("Open & close NRT reader every: never");
    }
    System.out.println("Repeat docs: " + repeatDocs);

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (indexSortField != null) {
        iwc.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
    }

    if (mode == Mode.UPDATE) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    iwc.setMaxBufferedDocs(maxBufferedDocs);
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    // So flushed segments do/don't use CFS:
    iwc.setUseCompoundFile(useCFS);

    final AtomicBoolean indexingFailed = new AtomicBoolean();

    iwc.setMergeScheduler(getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
    iwc.setMergePolicy(getMergePolicy(mergePolicy, useCFS));

    // Keep all commit points:
    if (doDeletions || doForceMerge) {
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    }

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return PostingsFormat.forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
        }

        private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName(facetDVFormatName);
        //private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
        //private final DocValuesFormat diskDVFormat = DocValuesFormat.forName("Disk");
        //        private final DocValuesFormat lucene45DVFormat = DocValuesFormat.forName("Lucene45");
        private final DocValuesFormat directDVFormat = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if (facetFields.contains(field) || field.equals("$facets")) {
                return facetsDVFormat;
                //} else if (field.equals("$facets_sorted_doc_values")) {
                //return diskDVFormat;
            } else {
                // Use default DVFormat for all else:
                // System.out.println("DV: field=" + field + " format=" + super.getDocValuesFormatForField(field));
                return super.getDocValuesFormatForField(field);
            }
        }
    };

    iwc.setCodec(codec);

    System.out.println("IW config=" + iwc);

    IndexWriter w = new IndexWriter(dir, iwc);

    System.out.println("Index has " + w.maxDoc() + " docs");

    final TaxonomyWriter taxoWriter;
    if (facetFields.isEmpty() == false) {
        taxoWriter = new DirectoryTaxonomyWriter(od.open(Paths.get(args.getString("-indexPath"), "facets")),
                IndexWriterConfig.OpenMode.CREATE);
    } else {
        taxoWriter = null;
    }

    // Fixed seed so group field values are always consistent:
    final Random random = new Random(17);

    LineFileDocs lineFileDocs = new LineFileDocs(lineFile, repeatDocs, storeBody, tvsBody, bodyPostingsOffsets,
            false, taxoWriter, facetFields, facetsConfig, addDVFields);

    float docsPerSecPerThread = -1f;
    //float docsPerSecPerThread = 100f;

    IndexThreads threads = new IndexThreads(random, w, indexingFailed, lineFileDocs, numThreads, docCountLimit,
            addGroupingFields, printDPS, mode, docsPerSecPerThread, null, nrtEverySec, randomDocIDMax);

    System.out.println("\nIndexer: start");
    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done() && indexingFailed.get() == false) {
        Thread.sleep(100);

        // Commits once per minute on average:
        if (doRandomCommit && random.nextInt(600) == 17) {
            System.out.println("Indexer: now commit");
            long commitStartNS = System.nanoTime();
            w.commit();
            System.out.println(String.format(Locale.ROOT, "Indexer: commit took %.1f msec",
                    (System.nanoTime() - commitStartNS) / 1000000.));
        }
    }

    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println("\nIndexer: indexing done (" + (t1 - t0) + " msec); total " + w.maxDoc() + " docs");
    // if we update we can not tell how many docs
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }
    if (mode != Mode.UPDATE && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }

    final Map<String, String> commitData = new HashMap<String, String>();

    if (waitForMerges) {
        w.close();
        IndexWriterConfig iwc2 = new IndexWriterConfig(a);
        iwc2.setMergeScheduler(
                getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
        iwc2.setMergePolicy(getMergePolicy(mergePolicy, useCFS));
        iwc2.setCodec(codec);
        iwc2.setUseCompoundFile(useCFS);
        iwc2.setMaxBufferedDocs(maxBufferedDocs);
        iwc2.setRAMBufferSizeMB(ramBufferSizeMB);
        if (indexSortField != null) {
            iwc2.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
        }

        w = new IndexWriter(dir, iwc2);
        long t2 = System.currentTimeMillis();
        System.out.println("\nIndexer: waitForMerges done (" + (t2 - t1) + " msec)");
    }

    if (waitForCommit) {
        commitData.put("userData", "multi");
        w.setLiveCommitData(commitData.entrySet());
        long t2 = System.currentTimeMillis();
        w.commit();
        long t3 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit multi (took " + (t3 - t2) + " msec)");
    } else {
        w.rollback();
        w = null;
    }

    if (doForceMerge) {
        long forceMergeStartMSec = System.currentTimeMillis();
        w.forceMerge(1);
        long forceMergeEndMSec = System.currentTimeMillis();
        System.out.println(
                "\nIndexer: force merge done (took " + (forceMergeEndMSec - forceMergeStartMSec) + " msec)");

        commitData.put("userData", "single");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t5 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit single done (took " + (t5 - forceMergeEndMSec) + " msec)");
    }

    if (doDeletions) {
        final long t5 = System.currentTimeMillis();
        // Randomly delete 5% of the docs
        final Set<Integer> deleted = new HashSet<Integer>();
        final int maxDoc = w.maxDoc();
        final int toDeleteCount = (int) (maxDoc * 0.05);
        System.out.println("\nIndexer: delete " + toDeleteCount + " docs");
        while (deleted.size() < toDeleteCount) {
            final int id = random.nextInt(maxDoc);
            if (!deleted.contains(id)) {
                deleted.add(id);
                w.deleteDocuments(new Term("id", LineFileDocs.intToID(id)));
            }
        }
        final long t6 = System.currentTimeMillis();
        System.out.println("\nIndexer: deletes done (took " + (t6 - t5) + " msec)");

        commitData.put("userData", doForceMerge ? "delsingle" : "delmulti");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t7 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit delmulti done (took " + (t7 - t6) + " msec)");

        if (doUpdate || w.numDocs() != maxDoc - toDeleteCount) {
            throw new RuntimeException(
                    "count mismatch: w.numDocs()=" + w.numDocs() + " but expected " + (maxDoc - toDeleteCount));
        }
    }

    if (taxoWriter != null) {
        System.out.println("Taxonomy has " + taxoWriter.getSize() + " ords");
        taxoWriter.commit();
        taxoWriter.close();
    }

    final long tCloseStart = System.currentTimeMillis();
    if (w != null) {
        w.close();
        w = null;
    }
    if (waitForCommit) {
        System.out.println("\nIndexer: at close: " + SegmentInfos.readLatestCommit(dir));
        System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) + " msec");
    }

    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());

    final long indexingTime;
    if (waitForCommit) {
        indexingTime = tFinal - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec)");
    } else {
        indexingTime = t1 - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec), excluding commit");
    }
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / (indexingTime / 3600000.))
                    + " GB/hour plain text");
}

From source file:perf.NRTPerfTest.java

License:Apache License

public static void main(String[] args) throws Exception {

    final String dirImpl = args[0];
    final String dirPath = args[1];
    final String commit = args[2];
    final String lineDocFile = args[3];
    final long seed = Long.parseLong(args[4]);
    final double docsPerSec = Double.parseDouble(args[5]);
    final double runTimeSec = Double.parseDouble(args[6]);
    final int numSearchThreads = Integer.parseInt(args[7]);
    int numIndexThreads = Integer.parseInt(args[8]);
    if (numIndexThreads > docsPerSec) {
        System.out.println("INFO: numIndexThreads higher than docsPerSec, adjusting numIndexThreads");
        numIndexThreads = (int) Math.max(1, docsPerSec);
    }//from  w ww  . ja v a2s . c o  m
    final double reopenPerSec = Double.parseDouble(args[9]);
    final Mode mode = Mode.valueOf(args[10].toUpperCase(Locale.ROOT));
    statsEverySec = Integer.parseInt(args[11]);
    final boolean doCommit = args[12].equals("yes");
    final double mergeMaxWriteMBPerSec = Double.parseDouble(args[13]);
    if (mergeMaxWriteMBPerSec != 0.0) {
        throw new IllegalArgumentException("mergeMaxWriteMBPerSec must be 0.0 until LUCENE-3202 is done");
    }
    final String tasksFile = args[14];
    if (Files.notExists(Paths.get(tasksFile))) {
        throw new FileNotFoundException("tasks file not found " + tasksFile);
    }

    final boolean hasProcMemInfo = Files.exists(Paths.get("/proc/meminfo"));

    System.out.println("DIR=" + dirImpl);
    System.out.println("Index=" + dirPath);
    System.out.println("Commit=" + commit);
    System.out.println("LineDocs=" + lineDocFile);
    System.out.println("Docs/sec=" + docsPerSec);
    System.out.println("Run time sec=" + runTimeSec);
    System.out.println("NumSearchThreads=" + numSearchThreads);
    System.out.println("NumIndexThreads=" + numIndexThreads);
    System.out.println("Reopen/sec=" + reopenPerSec);
    System.out.println("Mode=" + mode);
    System.out.println("tasksFile=" + tasksFile);

    System.out.println("Record stats every " + statsEverySec + " seconds");
    final int count = (int) ((runTimeSec / statsEverySec) + 2);
    docsIndexedByTime = new AtomicInteger[count];
    searchesByTime = new AtomicInteger[count];
    totalUpdateTimeByTime = new AtomicLong[count];
    final AtomicInteger reopensByTime[] = new AtomicInteger[count];
    for (int i = 0; i < count; i++) {
        docsIndexedByTime[i] = new AtomicInteger();
        searchesByTime[i] = new AtomicInteger();
        totalUpdateTimeByTime[i] = new AtomicLong();
        reopensByTime[i] = new AtomicInteger();
    }

    System.out.println(
            "Max merge MB/sec = " + (mergeMaxWriteMBPerSec <= 0.0 ? "unlimited" : mergeMaxWriteMBPerSec));
    final Random random = new Random(seed);

    final LineFileDocs docs = new LineFileDocs(lineDocFile, true, false, false, false, false, null,
            new HashSet<String>(), null, true);

    final Directory dir0;
    if (dirImpl.equals("MMapDirectory")) {
        dir0 = new MMapDirectory(Paths.get(dirPath));
    } else if (dirImpl.equals("NIOFSDirectory")) {
        dir0 = new NIOFSDirectory(Paths.get(dirPath));
    } else if (dirImpl.equals("SimpleFSDirectory")) {
        dir0 = new SimpleFSDirectory(Paths.get(dirPath));
    } else {
        docs.close();
        throw new RuntimeException("unknown directory impl \"" + dirImpl + "\"");
    }
    //final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 10, 200.0, mergeMaxWriteMBPerSec);
    final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 20, 400.0);
    //final MergeScheduler ms = dir.getMergeScheduler();
    //final Directory dir = dir0;
    //final MergeScheduler ms = new ConcurrentMergeScheduler();

    final String field = "body";

    // Open an IW on the requested commit point, but, don't
    // delete other (past or future) commit points:
    // TODO take Analyzer as parameter
    StandardAnalyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    conf.setRAMBufferSizeMB(256.0);
    //iwc.setMergeScheduler(ms);

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            if (field.equals("id")) {
                return PostingsFormat.forName("Memory");
            } else {
                return PostingsFormat.forName("Lucene50");
            }
        }

        private final DocValuesFormat direct = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return direct;
        }
    };

    conf.setCodec(codec);

    /*
    iwc.setMergePolicy(new LogByteSizeMergePolicy());
    ((LogMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false);
    ((LogMergePolicy) iwc.getMergePolicy()).setMergeFactor(30);
    ((LogByteSizeMergePolicy) iwc.getMergePolicy()).setMaxMergeMB(10000.0);
    System.out.println("USING LOG BS MP");
     */

    TieredMergePolicy tmp = new TieredMergePolicy();
    tmp.setNoCFSRatio(0.0);
    tmp.setMaxMergedSegmentMB(1000000.0);
    //tmp.setReclaimDeletesWeight(3.0);
    //tmp.setMaxMergedSegmentMB(7000.0);
    conf.setMergePolicy(tmp);

    if (!commit.equals("none")) {
        conf.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
    }

    // Make sure merges run @ higher prio than indexing:
    final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) conf.getMergeScheduler();
    cms.setMaxMergesAndThreads(4, 1);

    conf.setMergedSegmentWarmer(new MergedReaderWarmer(field));

    final IndexWriter w = new IndexWriter(dir, conf);
    // w.setInfoStream(System.out);

    IndexThreads.UpdatesListener updatesListener = new IndexThreads.UpdatesListener() {
        long startTimeNS;

        @Override
        public void beforeUpdate() {
            startTimeNS = System.nanoTime();
        }

        @Override
        public void afterUpdate() {
            int idx = currentQT.get();
            totalUpdateTimeByTime[idx].addAndGet(System.nanoTime() - startTimeNS);
            docsIndexedByTime[idx].incrementAndGet();
        }
    };
    IndexThreads indexThreads = new IndexThreads(random, w, new AtomicBoolean(false), docs, numIndexThreads, -1,
            false, false, mode, (float) (docsPerSec / numIndexThreads), updatesListener, -1.0, w.maxDoc());

    // NativePosixUtil.mlockTermsDict(startR, "id");
    final SearcherManager manager = new SearcherManager(w, null);
    IndexSearcher s = manager.acquire();
    try {
        System.out.println("Reader=" + s.getIndexReader());
    } finally {
        manager.release(s);
    }

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(manager, null, field, spellChecker, "PostingsHighlighter",
            null);
    final QueryParser qp = new QueryParser(field, analyzer);
    TaskParser taskParser = new TaskParser(indexState, qp, field, 10, random, true);
    final TaskSource tasks = new RandomTaskSource(taskParser, tasksFile, random) {
        @Override
        public void taskDone(Task task, long queueTimeNS, int toalHitCount) {
            searchesByTime[currentQT.get()].incrementAndGet();
        }
    };
    System.out.println("Task repeat count 1");
    System.out.println("Tasks file " + tasksFile);
    System.out.println("Num task per cat 20");
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, numSearchThreads);

    final ReopenThread reopenThread = new ReopenThread(reopenPerSec, manager, reopensByTime, runTimeSec);
    reopenThread.setName("ReopenThread");
    reopenThread.setPriority(4 + Thread.currentThread().getPriority());
    System.out.println("REOPEN PRI " + reopenThread.getPriority());

    indexThreads.start();
    reopenThread.start();
    taskThreads.start();

    Thread.currentThread().setPriority(5 + Thread.currentThread().getPriority());
    System.out.println("TIMER PRI " + Thread.currentThread().getPriority());

    //System.out.println("Start: " + new Date());

    final long startMS = System.currentTimeMillis();
    final long stopMS = startMS + (long) (runTimeSec * 1000);
    int lastQT = -1;
    while (true) {
        final long t = System.currentTimeMillis();
        if (t >= stopMS) {
            break;
        }
        final int qt = (int) ((t - startMS) / statsEverySec / 1000);
        currentQT.set(qt);
        if (qt != lastQT) {
            final int prevQT = lastQT;
            lastQT = qt;
            if (prevQT > 0) {
                final String other;
                if (hasProcMemInfo) {
                    other = " D=" + getLinuxDirtyBytes();
                } else {
                    other = "";
                }
                int prev = prevQT - 1;
                System.out.println(String.format("QT %d searches=%d docs=%d reopens=%s totUpdateTime=%d", prev,
                        searchesByTime[prev].get(), docsIndexedByTime[prev].get(),
                        reopensByTime[prev].get() + other,
                        TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[prev].get())));
            }
        }
        Thread.sleep(25);
    }

    taskThreads.stop();
    reopenThread.join();
    indexThreads.stop();

    System.out.println("By time:");
    for (int i = 0; i < searchesByTime.length - 2; i++) {
        System.out.println(String.format("  %d searches=%d docs=%d reopens=%d totUpdateTime=%d",
                i * statsEverySec, searchesByTime[i].get(), docsIndexedByTime[i].get(), reopensByTime[i].get(),
                TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[i].get())));
    }

    manager.close();
    if (doCommit) {
        w.close();
    } else {
        w.rollback();
    }
}

From source file:perf.SearchPerfTest.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    // args: dirImpl indexPath numThread numIterPerThread
    // eg java SearchPerfTest /path/to/index 4 100
    final Args args = new Args(clArgs);

    Directory dir0;/* ww w  . j a v a 2 s . co m*/
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dirImpl = args.getString("-dirImpl");

    OpenDirectory od = OpenDirectory.get(dirImpl);

    /*
    } else if (dirImpl.equals("NativePosixMMapDirectory")) {
      dir0 = new NativePosixMMapDirectory(new File(dirPath));
      ramDir = null;
      if (doFacets) {
        facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath));
      }
    } else if (dirImpl.equals("CachingDirWrapper")) {
      dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath)));
      ramDir = null;
    } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
      // Load only non-postings files into RAMDir (assumes
      // Lucene40PF is the wrapped PF):
      Set<String> postingsExtensions = new HashSet<String>();
      postingsExtensions.add("frq");
      postingsExtensions.add("prx");
      postingsExtensions.add("tip");
      postingsExtensions.add("tim");
              
      ramDir =  new RAMDirectory();
      Directory fsDir = new MMapDirectory(new File(dirPath));
      for (String file : fsDir.listAll()) {
        int idx = file.indexOf('.');
        if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) {
          continue;
        }
            
        fsDir.copy(ramDir, file, file, IOContext.READ);
      }
      dir0 = new FileSwitchDirectory(postingsExtensions,
                             fsDir,
                             ramDir,
                             true);
      if (doFacets) {
        facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ);
      }
      */

    final RAMDirectory ramDir;
    dir0 = od.open(Paths.get(dirPath));
    if (dir0 instanceof RAMDirectory) {
        ramDir = (RAMDirectory) dir0;
    } else {
        ramDir = null;
    }

    // TODO: NativeUnixDir?

    final String analyzer = args.getString("-analyzer");
    final String tasksFile = args.getString("-taskSource");
    final int searchThreadCount = args.getInt("-searchThreadCount");
    final String fieldName = args.getString("-field");
    final boolean printHeap = args.getFlag("-printHeap");
    final boolean doPKLookup = args.getFlag("-pk");
    final int topN = args.getInt("-topN");
    final boolean doStoredLoads = args.getFlag("-loadStoredFields");

    // Used to choose which random subset of tasks we will
    // run, to generate the PKLookup tasks, and to generate
    // any random pct filters:
    final long staticRandomSeed = args.getLong("-staticSeed");

    // Used to shuffle the random subset of tasks:
    final long randomSeed = args.getLong("-seed");

    // TODO: this could be way better.
    final String similarity = args.getString("-similarity");
    // now reflect
    final Class<? extends Similarity> simClazz = Class
            .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class);
    final Similarity sim = simClazz.newInstance();

    System.out.println("Using dir impl " + dir0.getClass().getName());
    System.out.println("Analyzer " + analyzer);
    System.out.println("Similarity " + similarity);
    System.out.println("Search thread count " + searchThreadCount);
    System.out.println("topN " + topN);
    System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit");
    System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes");

    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("ClassicAnalyzer")) {
        a = new ClassicAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2,
                ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final ReferenceManager<IndexSearcher> mgr;
    final IndexWriter writer;
    final Directory dir;

    final String commit = args.getString("-commit");
    final String hiliteImpl = args.getString("-hiliteImpl");

    final String logFile = args.getString("-log");

    final long tSearcherStart = System.currentTimeMillis();

    final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum");
    final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes");

    if (recacheFilterDeletes) {
        throw new UnsupportedOperationException("recacheFilterDeletes was deprecated");
    }

    if (args.getFlag("-nrt")) {
        // TODO: get taxoReader working here too
        // TODO: factor out & share this CL processing w/ Indexer
        final int indexThreadCount = args.getInt("-indexThreadCount");
        final String lineDocsFile = args.getString("-lineDocsFile");
        final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread");
        final float reopenEverySec = args.getFloat("-reopenEverySec");
        final boolean storeBody = args.getFlag("-store");
        final boolean tvsBody = args.getFlag("-tvs");
        final boolean useCFS = args.getFlag("-cfs");
        final String defaultPostingsFormat = args.getString("-postingsFormat");
        final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
        final boolean verbose = args.getFlag("-verbose");
        final boolean cloneDocs = args.getFlag("-cloneDocs");
        final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT));

        final long reopenEveryMS = (long) (1000 * reopenEverySec);

        if (verbose) {
            InfoStream.setDefault(new PrintStreamInfoStream(System.out));
        }

        if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
            System.out.println("Wrap NRTCachingDirectory");
            dir0 = new NRTCachingDirectory(dir0, 20, 400.0);
        }

        dir = dir0;

        final IndexWriterConfig iwc = new IndexWriterConfig(a);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
        iwc.setRAMBufferSizeMB(256.0);
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);

        // TODO: also RAMDirExceptDirect...?  need to
        // ... block deletes against wrapped FSDir?
        if (dirImpl.equals("RAMDirectory")) {
            // Let IW remove files only referenced by starting commit:
            iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy());
        }

        if (commit != null && commit.length() > 0) {
            System.out.println("Opening writer on commit=" + commit);
            iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
        }

        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024);
        //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4);

        final Codec codec = new Lucene62Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return PostingsFormat
                        .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
            }
        };
        iwc.setCodec(codec);

        final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // Only let one merge run at a time...
        // ... but queue up up to 4, before index thread is stalled:
        cms.setMaxMergesAndThreads(4, 1);

        iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                final long t0 = System.currentTimeMillis();
                //System.out.println("DO WARM: " + reader);
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.search(new TermQuery(new Term(fieldName, "united")), 10);
                final long t1 = System.currentTimeMillis();
                System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took "
                        + (t1 - t0) + " msec");
            }
        });

        writer = new IndexWriter(dir, iwc);
        System.out.println("Initial writer.maxDoc()=" + writer.maxDoc());

        // TODO: add -nrtBodyPostingsOffsets instead of
        // hardwired false:
        boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE;
        LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs,
                null, null, null, addDVFields);
        IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs,
                indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1);
        threads.start();

        mgr = new SearcherManager(writer, new SearcherFactory() {
            @Override
            public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) {
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.setSimilarity(sim);
                return s;
            }
        });

        System.out.println("reopen every " + reopenEverySec);

        Thread reopenThread = new Thread() {
            @Override
            public void run() {
                try {
                    final long startMS = System.currentTimeMillis();

                    int reopenCount = 1;
                    while (true) {
                        final long sleepMS = startMS + (reopenCount * reopenEveryMS)
                                - System.currentTimeMillis();
                        if (sleepMS < 0) {
                            System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms");
                        } else {
                            Thread.sleep(sleepMS);
                        }

                        Thread.sleep(sleepMS);
                        mgr.maybeRefresh();
                        reopenCount++;
                        IndexSearcher s = mgr.acquire();
                        try {
                            if (ramDir != null) {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(),
                                        writer.maxDoc(), s.getIndexReader().maxDoc(),
                                        s.getIndexReader().numDocs()));
                                //String[] l = ramDir.listAll();
                                //Arrays.sort(l);
                                //for(String f : l) {
                                //System.out.println("  " + f + ": " + ramDir.fileLength(f));
                                //}
                            } else {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(),
                                        s.getIndexReader().maxDoc(), s.getIndexReader().numDocs()));
                            }
                        } finally {
                            mgr.release(s);
                        }
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        reopenThread.setName("ReopenThread");
        reopenThread.setPriority(4 + Thread.currentThread().getPriority());
        reopenThread.start();

    } else {
        dir = dir0;
        writer = null;
        final DirectoryReader reader;
        if (commit != null && commit.length() > 0) {
            System.out.println("Opening searcher on commit=" + commit);
            reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir));
        } else {
            // open last commit
            reader = DirectoryReader.open(dir);
        }
        IndexSearcher s = new IndexSearcher(reader);
        s.setQueryCache(null); // don't bench the cache
        s.setSimilarity(sim);
        System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes="
                + (100. * reader.maxDoc() / reader.numDocs()));

        mgr = new SingleIndexSearcher(s);
    }

    System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT");

    {
        IndexSearcher s = mgr.acquire();
        try {
            System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc="
                    + s.getIndexReader().maxDoc() + ": " + s);
        } finally {
            mgr.release(s);
        }
    }

    //System.out.println("searcher=" + searcher);

    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);

    TaxonomyReader taxoReader;
    Path taxoPath = Paths.get(args.getString("-indexPath"), "facets");
    Directory taxoDir = od.open(taxoPath);
    if (DirectoryReader.indexExists(taxoDir)) {
        taxoReader = new DirectoryTaxonomyReader(taxoDir);
        System.out.println("Taxonomy has " + taxoReader.getSize() + " ords");
    } else {
        taxoReader = null;
    }

    final Random staticRandom = new Random(staticRandomSeed);
    final Random random = new Random(randomSeed);

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl,
            facetsConfig);

    final QueryParser queryParser = new QueryParser("body", a);
    TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom,
            doStoredLoads);

    final TaskSource tasks;

    if (tasksFile.startsWith("server:")) {
        int idx = tasksFile.indexOf(':', 8);
        if (idx == -1) {
            throw new RuntimeException(
                    "server is missing the port; should be server:interface:port (got: " + tasksFile + ")");
        }
        String iface = tasksFile.substring(7, idx);
        int port = Integer.valueOf(tasksFile.substring(1 + idx));
        RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser);

        // nocommit must stop thread?
        tasks = remoteTasks;
    } else {
        // Load the tasks from a file:
        final int taskRepeatCount = args.getInt("-taskRepeatCount");
        final int numTaskPerCat = args.getInt("-tasksPerCat");
        tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat,
                taskRepeatCount, doPKLookup);
        System.out.println("Task repeat count " + taskRepeatCount);
        System.out.println("Tasks file " + tasksFile);
        System.out.println("Num task per cat " + numTaskPerCat);
    }

    args.check();

    // Evil respeller:
    //spellChecker.setMinPrefix(0);
    //spellChecker.setMaxInspections(1024);
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount);
    Thread.sleep(10);

    final long startNanos = System.nanoTime();
    taskThreads.start();
    taskThreads.finish();
    final long endNanos = System.nanoTime();

    System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total");

    final List<Task> allTasks = tasks.getAllTasks();

    PrintStream out = new PrintStream(logFile);

    if (allTasks != null) {
        // Tasks were local: verify checksums:

        // indexState.setDocIDToID();

        final Map<Task, Task> tasksSeen = new HashMap<Task, Task>();

        out.println("\nResults for " + allTasks.size() + " tasks:");

        boolean fail = false;
        for (final Task task : allTasks) {
            if (verifyCheckSum) {
                final Task other = tasksSeen.get(task);
                if (other != null) {
                    if (task.checksum() != other.checksum()) {
                        System.out.println("\nTASK:");
                        task.printResults(System.out, indexState);
                        System.out.println("\nOTHER TASK:");
                        other.printResults(System.out, indexState);
                        fail = true;
                        //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other);
                    }
                } else {
                    tasksSeen.put(task, task);
                }
            }
            out.println("\nTASK: " + task);
            out.println("  " + (task.runTimeNanos / 1000000.0) + " msec");
            out.println("  thread " + task.threadID);
            task.printResults(out, indexState);
        }
        if (fail) {
            throw new RuntimeException("some tasks got different results across different threads");
        }

        allTasks.clear();
    }

    mgr.close();

    if (taxoReader != null) {
        taxoReader.close();
    }

    if (writer != null) {
        // Don't actually commit any index changes:
        writer.rollback();
    }

    dir.close();

    if (printHeap) {

        // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html
        final Runtime runtime = Runtime.getRuntime();
        long usedMem1 = PerfUtils.usedMemory(runtime);
        long usedMem2 = Long.MAX_VALUE;
        for (int iter = 0; iter < 10; iter++) {
            runtime.runFinalization();
            runtime.gc();
            Thread.yield();
            Thread.sleep(100);
            usedMem2 = usedMem1;
            usedMem1 = PerfUtils.usedMemory(runtime);
        }
        out.println("\nHEAP: " + PerfUtils.usedMemory(runtime));
    }
    out.close();
}