Example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

List of usage examples for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs.

Prototype

@Override
    public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) 

Source Link

Usage

From source file:perf.IDPerfTest.java

License:Apache License

private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock,
        final int maxTermsInBlock) throws IOException {
    System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock);
    Directory dir = FSDirectory.open(new File(indexPath));
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // So I can walk the files and get the *.tip sizes:
    iwc.setUseCompoundFile(false);//from w ww  . j  a  v a 2s . c  o  m

    iwc.setCodec(new Lucene53Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock);
        }
    });

    /// 7/7/7 segment structure:
    iwc.setMaxBufferedDocs(ID_COUNT / 777);
    iwc.setRAMBufferSizeMB(-1);
    //iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //iwc.setMergePolicy(new LogDocMergePolicy());
    ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
    ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000);
    iwc.getMergePolicy().setNoCFSRatio(0.0);

    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();

    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setTokenized(true);
    ft.freeze();

    BytesRef idValue = new BytesRef(64);
    Field idField = new Field("id", new BinaryTokenStream(idValue), ft);
    doc.add(idField);

    long t0 = System.nanoTime();
    BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT];
    Random random = new Random(17);
    int lookupCount = 0;
    double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT;
    for (int i = 0; i < ID_COUNT; i++) {
        ids.next(idValue);
        if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) {
            lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue);
        }
        // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc
        w.addDocument(doc);
    }

    if (lookupCount < lookupIDs.length) {
        throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length);
    }

    long indexTime = System.nanoTime() - t0;

    System.out.println("  indexing done; waitForMerges...");
    w.waitForMerges();

    IndexReader r = DirectoryReader.open(w, true);
    System.out.println("  reader=" + r);

    shuffle(random, lookupIDs);
    shuffle(random, lookupIDs);

    long bestTime = Long.MAX_VALUE;
    long checksum = 0;

    List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
    // Sort largest to smallest:
    Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
        @Override
        public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
            return c2.reader().maxDoc() - c1.reader().maxDoc();
        }
    });
    TermsEnum[] termsEnums = new TermsEnum[leaves.size()];
    DocsEnum[] docsEnums = new DocsEnum[leaves.size()];
    int[] docBases = new int[leaves.size()];
    for (int i = 0; i < leaves.size(); i++) {
        //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc());
        termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null);
        docBases[i] = leaves.get(i).docBase;
    }

    long rawLookupCount = 0;

    int countx = 0;
    for (int iter = 0; iter < 5; iter++) {
        t0 = System.nanoTime();
        BlockTreeTermsReader.seekExactFastNotFound = 0;
        BlockTreeTermsReader.seekExactFastRootNotFound = 0;
        rawLookupCount = 0;
        for (BytesRef id : lookupIDs) {
            if (countx++ < 50) {
                System.out.println("    id=" + id);
            }
            boolean found = false;
            for (int seg = 0; seg < termsEnums.length; seg++) {
                rawLookupCount++;
                if (termsEnums[seg].seekExact(id)) {
                    docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0);
                    int docID = docsEnums[seg].nextDoc();
                    if (docID == DocsEnum.NO_MORE_DOCS) {
                        // uh-oh!
                        throw new RuntimeException("id not found: " + id);
                    }
                    // paranoia:
                    checksum += docID + docBases[seg];

                    found = true;

                    // Optimization vs MultiFields: we don't need to check any more segments since id is PK
                    break;
                }
            }
            if (found == false) {
                // uh-oh!
                throw new RuntimeException("id not found: " + id);
            }
        }
        long lookupTime = System.nanoTime() - t0;
        System.out.println(String.format(Locale.ROOT, "  iter=" + iter + " lookupTime=%.3f sec",
                lookupTime / 1000000000.0));
        if (lookupTime < bestTime) {
            bestTime = lookupTime;
            System.out.println("    **");
        }
    }

    long totalBytes = 0;
    long termsIndexTotalBytes = 0;
    for (String fileName : dir.listAll()) {
        long bytes = dir.fileLength(fileName);
        totalBytes += bytes;
        if (fileName.endsWith(".tip")) {
            termsIndexTotalBytes += bytes;
        }
    }

    r.close();
    w.rollback();
    dir.close();

    return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0),
            totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound,
            BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock);
}

From source file:perf.IndexAndSearchOpenStreetMaps.java

License:Apache License

private static void createIndex(boolean fast, boolean doForceMerge, boolean doDistanceSort)
        throws IOException, InterruptedException {

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is;//from   w w w.j a va2s  .  c  om
    if (SMALL) {
        is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.subsetPlusAllLondon.txt"));
    } else {
        is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.txt"));
    }
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    int NUM_THREADS;
    if (fast) {
        NUM_THREADS = 4;
    } else {
        NUM_THREADS = 1;
    }

    int CHUNK = 10000;

    long t0 = System.nanoTime();
    AtomicLong totalCount = new AtomicLong();

    for (int part = 0; part < NUM_PARTS; part++) {
        Directory dir = FSDirectory.open(Paths.get(getName(part, doDistanceSort)));

        IndexWriterConfig iwc = new IndexWriterConfig(null);
        iwc.setCodec(getCodec(fast));
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        if (fast) {
            ((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
            iwc.setRAMBufferSizeMB(1024);
        } else {
            iwc.setMaxBufferedDocs(109630);
            iwc.setMergePolicy(new LogDocMergePolicy());
            iwc.setMergeScheduler(new SerialMergeScheduler());
        }
        iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        IndexWriter w = new IndexWriter(dir, iwc);

        Thread[] threads = new Thread[NUM_THREADS];
        AtomicBoolean finished = new AtomicBoolean();
        Object lock = new Object();

        final int finalPart = part;

        for (int t = 0; t < NUM_THREADS; t++) {
            threads[t] = new Thread() {
                @Override
                public void run() {
                    String[] lines = new String[CHUNK];
                    int chunkCount = 0;
                    while (finished.get() == false) {
                        try {
                            int count = CHUNK;
                            synchronized (lock) {
                                for (int i = 0; i < CHUNK; i++) {
                                    String line = reader.readLine();
                                    if (line == null) {
                                        count = i;
                                        finished.set(true);
                                        break;
                                    }
                                    lines[i] = line;
                                }
                                if (finalPart == 0 && totalCount.get() + count >= 2000000000) {
                                    finished.set(true);
                                }
                            }

                            for (int i = 0; i < count; i++) {
                                String[] parts = lines[i].split(",");
                                //long id = Long.parseLong(parts[0]);
                                double lat = Double.parseDouble(parts[1]);
                                double lon = Double.parseDouble(parts[2]);
                                Document doc = new Document();
                                if (useGeoPoint) {
                                    doc.add(new GeoPointField("point", lat, lon, Field.Store.NO));
                                } else if (useGeo3D || useGeo3DLarge) {
                                    doc.add(new Geo3DPoint("point", lat, lon));
                                } else {
                                    doc.add(new LatLonPoint("point", lat, lon));
                                    if (doDistanceSort) {
                                        doc.add(new LatLonDocValuesField("point", lat, lon));
                                    }
                                }
                                w.addDocument(doc);
                                long x = totalCount.incrementAndGet();
                                if (x % 1000000 == 0) {
                                    System.out.println(x + "...");
                                }
                            }
                            chunkCount++;
                            if (false && SMALL == false && chunkCount == 20000) {
                                System.out.println("NOW BREAK EARLY");
                                break;
                            }
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                }
            };
            threads[t].start();
        }

        for (Thread thread : threads) {
            thread.join();
        }

        System.out.println("Part " + part + " is done: w.maxDoc()=" + w.maxDoc());
        w.commit();
        System.out.println("done commit");
        long t1 = System.nanoTime();
        System.out.println(((t1 - t0) / 1000000000.0) + " sec to index part " + part);
        if (doForceMerge) {
            w.forceMerge(1);
            long t2 = System.nanoTime();
            System.out.println(((t2 - t1) / 1000000000.0) + " sec to force merge part " + part);
        }
        w.close();
    }

    //System.out.println(totalCount.get() + " total docs");
    //System.out.println("Force merge...");
    //w.forceMerge(1);
    //long t2 = System.nanoTime();
    //System.out.println(((t2-t1)/1000000000.0) + " sec to force merge");

    //w.close();
    //long t3 = System.nanoTime();
    //System.out.println(((t3-t2)/1000000000.0) + " sec to close");
    //System.out.println(((t3-t2)/1000000000.0) + " sec to close");
}

From source file:perf.IndexAndSearchOpenStreetMapsGeo3D.java

License:Apache License

private static void createIndex() throws IOException {

    long t0 = System.nanoTime();

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is = Files
            .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    DocValuesFormat dvFormat = new Geo3DDocValuesFormat();
    Directory dir = FSDirectory.open(Paths.get("bkdtestgeo3d"));
    Codec codec = new Lucene53Codec() {
        @Override/*w  w w  .j  ava2 s  . c  om*/
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return dvFormat;
        }
    };
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    //iwc.setRAMBufferSizeMB(256);
    iwc.setMaxBufferedDocs(109630);
    iwc.setCodec(codec);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter w = new IndexWriter(dir, iwc);

    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }

        String[] parts = line.split(",");
        //long id = Long.parseLong(parts[0]);
        double lat = toRadians(Double.parseDouble(parts[1]));
        double lon = toRadians(Double.parseDouble(parts[2]));
        Document doc = new Document();
        doc.add(new Geo3DPointField("point", PlanetModel.WGS84, lat, lon));
        w.addDocument(doc);
    }
    long t1 = System.nanoTime();
    System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index");
    System.out.println(w.maxDoc() + " total docs");
    //w.forceMerge(1);

    w.close();
    long t2 = System.nanoTime();
    System.out.println(((t2 - t1) / 1000000000.0) + " sec to close");
}

From source file:perf.Indexer.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    Args args = new Args(clArgs);

    // EG: -facets Date -facets characterCount ...
    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);
    final Set<String> facetFields = new HashSet<String>();
    if (args.hasArg("-facets")) {
        for (String arg : args.getStrings("-facets")) {
            facetFields.add(arg);/*w ww  .j  a va2  s  .co  m*/
        }
    }

    final String dirImpl = args.getString("-dirImpl");
    final String dirPath = args.getString("-indexPath") + "/index";

    final Directory dir;
    OpenDirectory od = OpenDirectory.get(dirImpl);

    dir = od.open(Paths.get(dirPath));

    final String analyzer = args.getString("-analyzer");
    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(), 2, 2);
    } else if (analyzer.equals("ShingleStandardAnalyzerNoStopWords")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final String lineFile = args.getString("-lineDocsFile");

    // -1 means all docs in the line file:
    final int docCountLimit = args.getInt("-docCountLimit");
    final int numThreads = args.getInt("-threadCount");

    final boolean doForceMerge = args.getFlag("-forceMerge");
    final boolean verbose = args.getFlag("-verbose");

    String indexSortField = null;
    SortField.Type indexSortType = null;

    if (args.hasArg("-indexSort")) {
        indexSortField = args.getString("-indexSort");

        int i = indexSortField.indexOf(':');
        if (i == -1) {
            throw new IllegalArgumentException(
                    "-indexSort should have form field:type; got: " + indexSortField);
        }
        String typeString = indexSortField.substring(i + 1, indexSortField.length());
        if (typeString.equals("long")) {
            indexSortType = SortField.Type.LONG;
        } else if (typeString.equals("string")) {
            indexSortType = SortField.Type.STRING;
        } else {
            throw new IllegalArgumentException("-indexSort can only handle 'long' sort; got: " + typeString);
        }
        indexSortField = indexSortField.substring(0, i);
    } else {
        indexSortType = null;
    }

    final double ramBufferSizeMB = args.getDouble("-ramBufferMB");
    final int maxBufferedDocs = args.getInt("-maxBufferedDocs");

    final String defaultPostingsFormat = args.getString("-postingsFormat");
    final boolean doDeletions = args.getFlag("-deletions");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean waitForMerges = args.getFlag("-waitForMerges");
    final boolean waitForCommit = args.getFlag("-waitForCommit");
    final String mergePolicy = args.getString("-mergePolicy");
    final Mode mode;
    final boolean doUpdate = args.getFlag("-update");
    if (doUpdate) {
        mode = Mode.UPDATE;
    } else {
        mode = Mode.valueOf(args.getString("-mode", "add").toUpperCase(Locale.ROOT));
    }
    int randomDocIDMax;
    if (mode == Mode.UPDATE) {
        randomDocIDMax = args.getInt("-randomDocIDMax");
    } else {
        randomDocIDMax = -1;
    }
    final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
    final boolean addGroupingFields = args.getFlag("-grouping");
    final boolean useCFS = args.getFlag("-cfs");
    final boolean storeBody = args.getFlag("-store");
    final boolean tvsBody = args.getFlag("-tvs");
    final boolean bodyPostingsOffsets = args.getFlag("-bodyPostingsOffsets");
    final int maxConcurrentMerges = args.getInt("-maxConcurrentMerges");
    final boolean addDVFields = args.getFlag("-dvfields");
    final boolean doRandomCommit = args.getFlag("-randomCommit");
    final boolean useCMS = args.getFlag("-useCMS");
    final boolean disableIOThrottle = args.getFlag("-disableIOThrottle");

    if (waitForCommit == false && waitForMerges) {
        throw new RuntimeException("pass -waitForCommit if you pass -waitForMerges");
    }

    if (waitForCommit == false && doForceMerge) {
        throw new RuntimeException("pass -waitForCommit if you pass -forceMerge");
    }

    if (waitForCommit == false && doDeletions) {
        throw new RuntimeException("pass -waitForCommit if you pass -deletions");
    }

    if (useCMS == false && disableIOThrottle) {
        throw new RuntimeException("-disableIOThrottle only makes sense with -useCMS");
    }

    final double nrtEverySec;
    if (args.hasArg("-nrtEverySec")) {
        nrtEverySec = args.getDouble("-nrtEverySec");
    } else {
        nrtEverySec = -1.0;
    }

    // True to start back at the beginning if we run out of
    // docs from the line file source:
    final boolean repeatDocs = args.getFlag("-repeatDocs");

    final String facetDVFormatName;
    if (facetFields.isEmpty()) {
        facetDVFormatName = "Lucene54";
    } else {
        facetDVFormatName = args.getString("-facetDVFormat");
    }

    if (addGroupingFields && docCountLimit == -1) {
        a.close();
        throw new RuntimeException("cannot add grouping fields unless docCount is set");
    }

    args.check();

    System.out.println("Dir: " + dirImpl);
    System.out.println("Index path: " + dirPath);
    System.out.println("Analyzer: " + analyzer);
    System.out.println("Line file: " + lineFile);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Force merge: " + (doForceMerge ? "yes" : "no"));
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("RAM Buffer MB: " + ramBufferSizeMB);
    System.out.println("Max buffered docs: " + maxBufferedDocs);
    System.out.println("Default postings format: " + defaultPostingsFormat);
    System.out.println("Do deletions: " + (doDeletions ? "yes" : "no"));
    System.out.println("Wait for merges: " + (waitForMerges ? "yes" : "no"));
    System.out.println("Wait for commit: " + (waitForCommit ? "yes" : "no"));
    System.out.println("IO throttle: " + (disableIOThrottle ? "no" : "yes"));
    System.out.println("Merge policy: " + mergePolicy);
    System.out.println("Mode: " + mode);
    if (mode == Mode.UPDATE) {
        System.out.println("DocIDMax: " + randomDocIDMax);
    }
    System.out.println("ID field postings format: " + idFieldPostingsFormat);
    System.out.println("Add grouping fields: " + (addGroupingFields ? "yes" : "no"));
    System.out.println("Compound file format: " + (useCFS ? "yes" : "no"));
    System.out.println("Store body field: " + (storeBody ? "yes" : "no"));
    System.out.println("Term vectors for body field: " + (tvsBody ? "yes" : "no"));
    System.out.println("Facet DV Format: " + facetDVFormatName);
    System.out.println("Facet fields: " + facetFields);
    System.out.println("Body postings offsets: " + (bodyPostingsOffsets ? "yes" : "no"));
    System.out.println("Max concurrent merges: " + maxConcurrentMerges);
    System.out.println("Add DocValues fields: " + addDVFields);
    System.out.println("Use ConcurrentMergeScheduler: " + useCMS);
    if (nrtEverySec > 0.0) {
        System.out.println("Open & close NRT reader every: " + nrtEverySec + " sec");
    } else {
        System.out.println("Open & close NRT reader every: never");
    }
    System.out.println("Repeat docs: " + repeatDocs);

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (indexSortField != null) {
        iwc.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
    }

    if (mode == Mode.UPDATE) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    iwc.setMaxBufferedDocs(maxBufferedDocs);
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    // So flushed segments do/don't use CFS:
    iwc.setUseCompoundFile(useCFS);

    final AtomicBoolean indexingFailed = new AtomicBoolean();

    iwc.setMergeScheduler(getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
    iwc.setMergePolicy(getMergePolicy(mergePolicy, useCFS));

    // Keep all commit points:
    if (doDeletions || doForceMerge) {
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    }

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return PostingsFormat.forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
        }

        private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName(facetDVFormatName);
        //private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
        //private final DocValuesFormat diskDVFormat = DocValuesFormat.forName("Disk");
        //        private final DocValuesFormat lucene45DVFormat = DocValuesFormat.forName("Lucene45");
        private final DocValuesFormat directDVFormat = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if (facetFields.contains(field) || field.equals("$facets")) {
                return facetsDVFormat;
                //} else if (field.equals("$facets_sorted_doc_values")) {
                //return diskDVFormat;
            } else {
                // Use default DVFormat for all else:
                // System.out.println("DV: field=" + field + " format=" + super.getDocValuesFormatForField(field));
                return super.getDocValuesFormatForField(field);
            }
        }
    };

    iwc.setCodec(codec);

    System.out.println("IW config=" + iwc);

    IndexWriter w = new IndexWriter(dir, iwc);

    System.out.println("Index has " + w.maxDoc() + " docs");

    final TaxonomyWriter taxoWriter;
    if (facetFields.isEmpty() == false) {
        taxoWriter = new DirectoryTaxonomyWriter(od.open(Paths.get(args.getString("-indexPath"), "facets")),
                IndexWriterConfig.OpenMode.CREATE);
    } else {
        taxoWriter = null;
    }

    // Fixed seed so group field values are always consistent:
    final Random random = new Random(17);

    LineFileDocs lineFileDocs = new LineFileDocs(lineFile, repeatDocs, storeBody, tvsBody, bodyPostingsOffsets,
            false, taxoWriter, facetFields, facetsConfig, addDVFields);

    float docsPerSecPerThread = -1f;
    //float docsPerSecPerThread = 100f;

    IndexThreads threads = new IndexThreads(random, w, indexingFailed, lineFileDocs, numThreads, docCountLimit,
            addGroupingFields, printDPS, mode, docsPerSecPerThread, null, nrtEverySec, randomDocIDMax);

    System.out.println("\nIndexer: start");
    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done() && indexingFailed.get() == false) {
        Thread.sleep(100);

        // Commits once per minute on average:
        if (doRandomCommit && random.nextInt(600) == 17) {
            System.out.println("Indexer: now commit");
            long commitStartNS = System.nanoTime();
            w.commit();
            System.out.println(String.format(Locale.ROOT, "Indexer: commit took %.1f msec",
                    (System.nanoTime() - commitStartNS) / 1000000.));
        }
    }

    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println("\nIndexer: indexing done (" + (t1 - t0) + " msec); total " + w.maxDoc() + " docs");
    // if we update we can not tell how many docs
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }
    if (mode != Mode.UPDATE && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }

    final Map<String, String> commitData = new HashMap<String, String>();

    if (waitForMerges) {
        w.close();
        IndexWriterConfig iwc2 = new IndexWriterConfig(a);
        iwc2.setMergeScheduler(
                getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
        iwc2.setMergePolicy(getMergePolicy(mergePolicy, useCFS));
        iwc2.setCodec(codec);
        iwc2.setUseCompoundFile(useCFS);
        iwc2.setMaxBufferedDocs(maxBufferedDocs);
        iwc2.setRAMBufferSizeMB(ramBufferSizeMB);
        if (indexSortField != null) {
            iwc2.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
        }

        w = new IndexWriter(dir, iwc2);
        long t2 = System.currentTimeMillis();
        System.out.println("\nIndexer: waitForMerges done (" + (t2 - t1) + " msec)");
    }

    if (waitForCommit) {
        commitData.put("userData", "multi");
        w.setLiveCommitData(commitData.entrySet());
        long t2 = System.currentTimeMillis();
        w.commit();
        long t3 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit multi (took " + (t3 - t2) + " msec)");
    } else {
        w.rollback();
        w = null;
    }

    if (doForceMerge) {
        long forceMergeStartMSec = System.currentTimeMillis();
        w.forceMerge(1);
        long forceMergeEndMSec = System.currentTimeMillis();
        System.out.println(
                "\nIndexer: force merge done (took " + (forceMergeEndMSec - forceMergeStartMSec) + " msec)");

        commitData.put("userData", "single");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t5 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit single done (took " + (t5 - forceMergeEndMSec) + " msec)");
    }

    if (doDeletions) {
        final long t5 = System.currentTimeMillis();
        // Randomly delete 5% of the docs
        final Set<Integer> deleted = new HashSet<Integer>();
        final int maxDoc = w.maxDoc();
        final int toDeleteCount = (int) (maxDoc * 0.05);
        System.out.println("\nIndexer: delete " + toDeleteCount + " docs");
        while (deleted.size() < toDeleteCount) {
            final int id = random.nextInt(maxDoc);
            if (!deleted.contains(id)) {
                deleted.add(id);
                w.deleteDocuments(new Term("id", LineFileDocs.intToID(id)));
            }
        }
        final long t6 = System.currentTimeMillis();
        System.out.println("\nIndexer: deletes done (took " + (t6 - t5) + " msec)");

        commitData.put("userData", doForceMerge ? "delsingle" : "delmulti");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t7 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit delmulti done (took " + (t7 - t6) + " msec)");

        if (doUpdate || w.numDocs() != maxDoc - toDeleteCount) {
            throw new RuntimeException(
                    "count mismatch: w.numDocs()=" + w.numDocs() + " but expected " + (maxDoc - toDeleteCount));
        }
    }

    if (taxoWriter != null) {
        System.out.println("Taxonomy has " + taxoWriter.getSize() + " ords");
        taxoWriter.commit();
        taxoWriter.close();
    }

    final long tCloseStart = System.currentTimeMillis();
    if (w != null) {
        w.close();
        w = null;
    }
    if (waitForCommit) {
        System.out.println("\nIndexer: at close: " + SegmentInfos.readLatestCommit(dir));
        System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) + " msec");
    }

    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());

    final long indexingTime;
    if (waitForCommit) {
        indexingTime = tFinal - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec)");
    } else {
        indexingTime = t1 - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec), excluding commit");
    }
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / (indexingTime / 3600000.))
                    + " GB/hour plain text");
}

From source file:perf.IndexGeoNames.java

License:Apache License

public static void main(String[] args) throws Exception {
    String geoNamesFile = args[0];
    File indexPath = new File(args[1]);
    int numThreads = Integer.parseInt(args[2]);
    int precStep = Integer.parseInt(args[3]);
    if (indexPath.exists()) {
        throw new IllegalArgumentException("please remove indexPath \"" + indexPath + "\" before running");
    }//from  w ww.  ja  v  a 2 s  .  c  om

    Directory dir = FSDirectory.open(indexPath);
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
    //iwc.setRAMBufferSizeMB(350);
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    if (normal == false) {
        iwc.setRAMBufferSizeMB(1024);
        iwc.setMergePolicy(NoMergePolicy.INSTANCE);
        //iwc.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
    } else {
        // 5/5 segments:
        iwc.setMaxBufferedDocs(157234);
        iwc.setRAMBufferSizeMB(-1);
    }
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(3, 1);
    final IndexWriter w = new IndexWriter(dir, iwc);

    final Field.Store store = Field.Store.NO;

    final FieldType doubleFieldType = new FieldType(
            store == Field.Store.NO ? DoubleField.TYPE_NOT_STORED : DoubleField.TYPE_STORED);
    doubleFieldType.setNumericPrecisionStep(precStep);
    doubleFieldType.freeze();

    final FieldType longFieldType = new FieldType(
            store == Field.Store.NO ? LongField.TYPE_NOT_STORED : LongField.TYPE_STORED);
    longFieldType.setNumericPrecisionStep(precStep);
    longFieldType.freeze();

    final FieldType intFieldType = new FieldType(
            store == Field.Store.NO ? IntField.TYPE_NOT_STORED : IntField.TYPE_STORED);
    intFieldType.setNumericPrecisionStep(precStep);
    intFieldType.freeze();

    // 64K buffer:
    InputStream is = new FileInputStream(geoNamesFile);
    final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);
    final AtomicInteger docsIndexed = new AtomicInteger();

    final long startMS = System.currentTimeMillis();
    Thread[] threads = new Thread[numThreads];

    // With reuse it's ~ 38% faster (41.8 sec vs 67.0 sec):
    final boolean reuseDocAndFields = false;

    for (int i = 0; i < numThreads; i++) {
        threads[i] = new Thread() {
            @Override
            public void run() {
                ParsePosition datePos = new ParsePosition(0);
                SimpleDateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd", Locale.US);

                if (reuseDocAndFields) {
                    Document doc = new Document();
                    IntField geoNameID = new IntField("geoNameID", 0, intFieldType);
                    doc.add(geoNameID);
                    TextField nameField = new TextField("name", "", store);
                    doc.add(nameField);
                    TextField asciiNameField = new TextField("asciiName", "", store);
                    doc.add(asciiNameField);
                    TextField alternateNameField = new TextField("alternateNames", "", store);
                    doc.add(alternateNameField);
                    StringField featureClassField = new StringField("featureClass", "", store);
                    doc.add(featureClassField);
                    StringField featureCodeField = new StringField("featureCode", "", store);
                    doc.add(featureCodeField);
                    StringField countryCodeField = new StringField("countryCode", "", store);
                    doc.add(countryCodeField);
                    StringField cc2Field = new StringField("cc2", "", store);
                    doc.add(cc2Field);
                    StringField admin1Field = new StringField("admin1", "", store);
                    doc.add(admin1Field);
                    StringField admin2Field = new StringField("admin2", "", store);
                    doc.add(admin2Field);
                    StringField admin3Field = new StringField("admin3", "", store);
                    doc.add(admin3Field);
                    StringField admin4Field = new StringField("admin4", "", store);
                    doc.add(admin4Field);
                    StringField tzField = new StringField("timezone", "", store);
                    doc.add(tzField);

                    while (true) {
                        try {

                            // Curiously BufferedReader.readLine seems to be thread-safe...
                            String line = reader.readLine();
                            if (line == null) {
                                break;
                            }
                            String[] values = line.split("\t");

                            geoNameID.setIntValue(Integer.parseInt(values[0]));
                            nameField.setStringValue(values[1]);
                            asciiNameField.setStringValue(values[2]);
                            alternateNameField.setStringValue(values[3]);

                            /*
                            if (values[4].isEmpty() == false) {
                              double v = Double.parseDouble(values[4]);
                              doc.add(new DoubleField("latitude", v, doubleFieldType));
                              doc.add(new DoubleDocValuesField("latitude", v));
                            }
                            if (values[5].isEmpty() == false) {
                              double v = Double.parseDouble(values[5]);
                              doc.add(new DoubleField("longitude", v, doubleFieldType));
                              doc.add(new DoubleDocValuesField("longitude", v));
                            }
                            */

                            featureClassField.setStringValue(values[6]);
                            featureCodeField.setStringValue(values[7]);
                            countryCodeField.setStringValue(values[8]);
                            cc2Field.setStringValue(values[9]);
                            admin1Field.setStringValue(values[10]);
                            admin2Field.setStringValue(values[11]);
                            admin3Field.setStringValue(values[12]);
                            admin4Field.setStringValue(values[13]);

                            /*
                            if (values[14].isEmpty() == false) {
                              long v = Long.parseLong(values[14]);
                              doc.add(new LongField("population", v, longFieldType));
                              doc.add(new NumericDocValuesField("population", v));
                            }
                            if (values[15].isEmpty() == false) {
                              long v = Long.parseLong(values[15]);
                              doc.add(new LongField("elevation", v, longFieldType));
                              doc.add(new NumericDocValuesField("elevation", v));
                            }
                            if (values[16].isEmpty() == false) {
                              doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType));
                            }
                            */

                            tzField.setStringValue(values[17]);
                            /*
                            if (values[18].isEmpty() == false) {
                              datePos.setIndex(0);
                              Date date = dateParser.parse(values[18], datePos);
                              doc.add(new LongField("modified", date.getTime(), longFieldType));
                            }
                            */
                            w.addDocument(doc);
                            int count = docsIndexed.incrementAndGet();
                            if (count % 200000 == 0) {
                                long ms = System.currentTimeMillis();
                                System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec");
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                } else {
                    while (true) {
                        try {

                            // Curiously BufferedReader.readLine seems to be thread-safe...
                            String line = reader.readLine();
                            if (line == null) {
                                break;
                            }
                            String[] values = line.split("\t");

                            Document doc = new Document();

                            doc.add(new IntField("geoNameID", Integer.parseInt(values[0]), intFieldType));
                            doc.add(new TextField("name", values[1], store));
                            doc.add(new TextField("asciiName", values[2], store));
                            doc.add(new TextField("alternateNames", values[3], store));

                            if (values[4].isEmpty() == false) {
                                double v = Double.parseDouble(values[4]);
                                doc.add(new DoubleField("latitude", v, doubleFieldType));
                                doc.add(new DoubleDocValuesField("latitude", v));
                            }
                            if (values[5].isEmpty() == false) {
                                double v = Double.parseDouble(values[5]);
                                doc.add(new DoubleField("longitude", v, doubleFieldType));
                                doc.add(new DoubleDocValuesField("longitude", v));
                            }

                            doc.add(new StringField("featureClass", values[6], store));
                            doc.add(new StringField("featureCode", values[7], store));
                            doc.add(new StringField("countryCode", values[8], store));
                            doc.add(new StringField("cc2", values[9], store));
                            doc.add(new StringField("admin1Code", values[10], store));
                            doc.add(new StringField("admin2Code", values[11], store));
                            doc.add(new StringField("admin3Code", values[12], store));
                            doc.add(new StringField("admin4Code", values[13], store));

                            if (values[14].isEmpty() == false) {
                                long v = Long.parseLong(values[14]);
                                doc.add(new LongField("population", v, longFieldType));
                                doc.add(new NumericDocValuesField("population", v));
                            }
                            if (values[15].isEmpty() == false) {
                                long v = Long.parseLong(values[15]);
                                doc.add(new LongField("elevation", v, longFieldType));
                                doc.add(new NumericDocValuesField("elevation", v));
                            }
                            if (values[16].isEmpty() == false) {
                                doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType));
                            }

                            doc.add(new StringField("timezone", values[17], store));

                            if (values[18].isEmpty() == false) {
                                datePos.setIndex(0);
                                Date date = dateParser.parse(values[18], datePos);
                                doc.add(new LongField("modified", date.getTime(), longFieldType));
                            }
                            w.addDocument(doc);
                            int count = docsIndexed.incrementAndGet();
                            if (count % 200000 == 0) {
                                long ms = System.currentTimeMillis();
                                System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec");
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                }
            }
        };
        threads[i].start();
    }
    DirectoryReader r = DirectoryReader.open(w, true);
    for (int i = 0; i < 100; i++) {
        DirectoryReader r2 = DirectoryReader.openIfChanged(r);
        if (r2 != null) {
            r.close();
            r = r2;
        }
        Thread.sleep(500);
    }
    if (r != null) {
        r.close();
        r = null;
    }
    for (int i = 0; i < numThreads; i++) {
        threads[i].join();
    }
    long ms = System.currentTimeMillis();
    System.out.println(docsIndexed + ": " + ((ms - startMS) / 1000.0) + " sec");
    //System.out.println("tot conflicts: " + BytesRefHash.totConflict);
    //w.shutdown(normal);
    w.close();
    dir.close();
}

From source file:perf.IndexNumbers.java

License:Apache License

public static void main(String[] args) throws Exception {
    String numbersFile = args[0];
    File indexPath = new File(args[1]);
    int numThreads = Integer.parseInt(args[2]);
    int precStep = Integer.parseInt(args[3]);
    if (indexPath.exists()) {
        throw new IllegalArgumentException("please remove indexPath \"" + indexPath + "\" before running");
    }/*from   w w w.  j  a  v a 2s .c o m*/

    Directory dir = FSDirectory.open(indexPath);
    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
    //iwc.setRAMBufferSizeMB(350);
    //iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    if (normal == false) {
        iwc.setRAMBufferSizeMB(64);
        iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    } else {
        // 5/5 segments:
        // 3273681 docs in twitter timestamps
        iwc.setMaxBufferedDocs(59522);
        iwc.setRAMBufferSizeMB(-1);
    }
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(3, 1);
    final IndexWriter w = new IndexWriter(dir, iwc);

    final Field.Store store = Field.Store.NO;

    final FieldType doubleFieldType = new FieldType(
            store == Field.Store.NO ? DoubleField.TYPE_NOT_STORED : DoubleField.TYPE_STORED);
    doubleFieldType.setNumericPrecisionStep(precStep);
    doubleFieldType.freeze();

    final FieldType longFieldType = new FieldType(
            store == Field.Store.NO ? LongField.TYPE_NOT_STORED : LongField.TYPE_STORED);
    longFieldType.setNumericPrecisionStep(precStep);
    longFieldType.freeze();

    final FieldType intFieldType = new FieldType(
            store == Field.Store.NO ? IntField.TYPE_NOT_STORED : IntField.TYPE_STORED);
    intFieldType.setNumericPrecisionStep(precStep);
    intFieldType.freeze();

    // 64K buffer:
    InputStream is = new FileInputStream(numbersFile);
    final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);
    final AtomicInteger docsIndexed = new AtomicInteger();

    final long startMS = System.currentTimeMillis();
    Thread[] threads = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
        threads[i] = new Thread() {
            @Override
            public void run() {
                ParsePosition datePos = new ParsePosition(0);
                SimpleDateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd", Locale.US);

                Document doc = new Document();
                Field field = new LongField("number", 0L, longFieldType);
                doc.add(field);

                while (true) {
                    try {
                        // Curiously BufferedReader.readLine seems to be thread-safe...
                        String line = reader.readLine();
                        if (line == null) {
                            break;
                        }
                        field.setLongValue(Long.parseLong(line.trim()));
                        w.addDocument(doc);
                        int count = docsIndexed.incrementAndGet();
                        if (count % 200000 == 0) {
                            long ms = System.currentTimeMillis();
                            System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec");
                        }
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            }
        };
        threads[i].start();
    }
    for (int i = 0; i < numThreads; i++) {
        threads[i].join();
    }
    long ms = System.currentTimeMillis();
    System.out.println(docsIndexed + ": " + ((ms - startMS) / 1000.0) + " sec");
    //System.out.println("tot conflicts: " + BytesRefHash.totConflict);
    if (normal == false) {
        w.abortMerges();
    }
    w.close();
    dir.close();
}

From source file:perf.PKLookupPerfTest3X.java

License:Apache License

private static void createIndex(final Directory dir, final int docCount) throws IOException {
    System.out.println("Create index... " + docCount + " docs");

    final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,
            new WhitespaceAnalyzer(Version.LUCENE_35));
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // 5 segs per level in 3 levels:
    int mbd = docCount / (5 * 111);
    iwc.setMaxBufferedDocs(mbd);
    iwc.setRAMBufferSizeMB(-1.0);/*from   w  w w.  j a v  a  2 s  .  com*/
    ((TieredMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false);
    final IndexWriter w = new IndexWriter(dir, iwc);
    //w.setInfoStream(System.out);

    final Document doc = new Document();
    final Field field = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
    field.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
    doc.add(field);

    for (int i = 0; i < docCount; i++) {
        field.setValue(String.format("%09d", i));
        w.addDocument(doc);
        if ((i + 1) % 1000000 == 0) {
            System.out.println((i + 1) + "...");
        }
    }
    w.waitForMerges();
    w.close();
}

From source file:perf.TermsQueryPerf.java

License:Apache License

public static void main(String[] args) throws Exception {

    List<BytesRef> lookupIDs = new ArrayList<>();
    Random random = new Random(17);
    double rate = 1.01 * ((double) NUM_QUERIES * ID_SEARCH_COUNT) / ID_INDEX_COUNT;

    Path indexPath = Paths.get(args[0]);

    boolean doIndex = Files.exists(indexPath) == false;

    Directory dir = FSDirectory.open(indexPath);

    if (doIndex) {
        IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer());
        iwc.setMergeScheduler(new SerialMergeScheduler());
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // So I can walk the files and get the *.tip sizes:
        iwc.setUseCompoundFile(false);/*from   www  . ja  v  a2 s . co m*/

        /// 7/7/7 segment structure:
        iwc.setMaxBufferedDocs(ID_INDEX_COUNT / 777);
        iwc.setRAMBufferSizeMB(-1);

        ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);

        IndexWriter w = new IndexWriter(dir, iwc);
        // IDIterator ids = zeroPadSequentialIDs(10);
        IDIterator ids = randomIDs(10, random);

        BytesRef idValue = new BytesRef(64);
        for (int i = 0; i < ID_INDEX_COUNT; i++) {
            ids.next(idValue);
            Document doc = new Document();
            doc.add(new StringField("id", idValue, Field.Store.NO));
            w.addDocument(doc);
            if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
                lookupIDs.add(BytesRef.deepCopyOf(idValue));
            }
            if (i % 100000 == 0) {
                System.out.println(i + " docs...");
            }
        }
        w.close();
    }

    IndexReader r = DirectoryReader.open(dir);

    if (doIndex == false) {
        System.out.println("Build lookup ids");
        TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator();
        BytesRef idValue;
        while ((idValue = termsEnum.next()) != null) {
            if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
                lookupIDs.add(BytesRef.deepCopyOf(idValue));
                //System.out.println("add: " + idValue);
            }
        }
        shuffle(random, lookupIDs);
        System.out.println("Done build lookup ids");
    }

    IndexSearcher s = new IndexSearcher(r);

    if (lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
        throw new RuntimeException(
                "didn't get enough lookup ids: " + (NUM_QUERIES * ID_SEARCH_COUNT) + " vs " + lookupIDs.size());
    }

    List<Query> queries = new ArrayList<Query>();
    for (int i = 0; i < NUM_QUERIES; i++) {

        List<BytesRef> sortedTermBytes = new ArrayList<>();
        for (BytesRef term : lookupIDs.subList(i * ID_SEARCH_COUNT, (i + 1) * ID_SEARCH_COUNT)) {
            sortedTermBytes.add(term);
        }
        Collections.sort(sortedTermBytes);

        // nocommit only do this if term count is high enough?
        // nocommit: we can be more efficient here, go straight to binary:
        Query query = new AutomatonQuery(new Term("id", "manyterms"),
                Automata.makeStringUnion(sortedTermBytes));
        //((MultiTermQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
        //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT));
        queries.add(query);
    }

    // TODO: also include construction time of queries
    long best = Long.MAX_VALUE;
    for (int iter = 0; iter < 100; iter++) {
        long t0 = System.nanoTime();
        long totCount = 0;
        for (int i = 0; i < NUM_QUERIES; i++) {
            //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT));
            Query query = queries.get(i);
            totCount += s.search(query, 10).totalHits;
        }
        if (totCount != NUM_QUERIES * ID_SEARCH_COUNT) {
            throw new RuntimeException(
                    "totCount=" + totCount + " but expected " + (NUM_QUERIES * ID_SEARCH_COUNT));
        }
        long t = System.nanoTime() - t0;
        System.out.println("ITER: " + iter + ": " + (t / 1000000.) + " msec");
        if (t < best) {
            System.out.println("  **");
            best = t;
        }
    }

    IOUtils.close(r, dir);
}