Example usage for org.apache.lucene.index LeafReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java

License:Apache License

/**
 * Backward compatibility support for legacy lat/lon double arrays
 *///from   w  ww. j  a  v a  2s  . com
private AtomicGeoPointFieldData loadLegacyFieldData(LeafReader reader, NonEstimatingEstimator estimator,
        Terms terms, AtomicGeoPointFieldData data) throws Exception {
    DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
    DoubleArray lon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    boolean success = false;
    try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(),
            acceptableTransientOverheadRatio)) {
        final GeoPointTermsEnumLegacy iter = new GeoPointTermsEnumLegacy(
                builder.buildFromTerms(terms.iterator()));
        GeoPoint point;
        long numTerms = 0;
        while ((point = iter.next()) != null) {
            lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms + 1);
            lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms + 1);
            lat.set(numTerms, point.getLat());
            lon.set(numTerms, point.getLon());
            ++numTerms;
        }
        lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms);
        lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms);

        Ordinals build = builder.build(fieldDataType.getSettings());
        RandomAccessOrds ordinals = build.ordinals();
        if (!(FieldData.isMultiValued(ordinals) || CommonSettings
                .getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) {
            int maxDoc = reader.maxDoc();
            DoubleArray sLat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
            DoubleArray sLon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
            for (int i = 0; i < maxDoc; i++) {
                ordinals.setDocument(i);
                long nativeOrdinal = ordinals.nextOrd();
                if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) {
                    sLat.set(i, lat.get(nativeOrdinal));
                    sLon.set(i, lon.get(nativeOrdinal));
                }
            }
            BitSet set = builder.buildDocsWithValuesSet();
            data = new GeoPointArrayLegacyAtomicFieldData.Single(sLon, sLat, set);
        } else {
            data = new GeoPointArrayLegacyAtomicFieldData.WithOrdinals(lon, lat, build, reader.maxDoc());
        }
        success = true;
        return data;
    } finally {
        if (success) {
            estimator.afterLoad(null, data.ramBytesUsed());
        }
    }
}

From source file:org.elasticsearch.index.shard.IndexShardTestCase.java

License:Apache License

protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
    shard.refresh("get_uids");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        Set<Uid> ids = new HashSet<>();
        for (LeafReaderContext leafContext : searcher.reader().leaves()) {
            LeafReader reader = leafContext.reader();
            Bits liveDocs = reader.getLiveDocs();
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
                    ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
                }/*  w ww .j av a 2 s.co  m*/
            }
        }
        return ids;
    }
}

From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
    return new ConstantScoreWeight(this, boost) {
        @Override/* w  ww  . j  a  va 2s.c o m*/
        public String toString() {
            return "weight(delete docs query)";
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc());
            Terms terms = leafReader.terms(RoutingFieldMapper.NAME);
            Predicate<BytesRef> includeInShard = ref -> {
                int targetShardId = OperationRouting.generateShardId(indexMetaData,
                        Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                return shardId == targetShardId;
            };
            if (terms == null) {
                // this is the common case - no partitioning and no _routing values
                // in this case we also don't do anything special with regards to nested docs since we basically delete
                // by ID and parent and nested all have the same id.
                assert indexMetaData.isRoutingPartitionedIndex() == false;
                findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
            } else {
                final BitSet parentBitSet;
                if (nestedParentBitSetProducer == null) {
                    parentBitSet = null;
                } else {
                    parentBitSet = nestedParentBitSetProducer.getBitSet(context);
                    if (parentBitSet == null) {
                        return null; // no matches
                    }
                }
                if (indexMetaData.isRoutingPartitionedIndex()) {
                    // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                    // this this index is routing partitioned.
                    Visitor visitor = new Visitor(leafReader);
                    TwoPhaseIterator twoPhaseIterator = parentBitSet == null
                            ? new RoutingPartitionedDocIdSetIterator(visitor)
                            : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
                    return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                } else {
                    // here we potentially guard the docID consumers with our parent bitset if we have one.
                    // this ensures that we are only marking root documents in the nested case and if necessary
                    // we do a second pass to mark the corresponding children in markChildDocs
                    Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> {
                        if (parentBitSet != null) {
                            return docId -> {
                                if (parentBitSet.get(docId)) {
                                    consumer.accept(docId);
                                }
                            };
                        }
                        return consumer;
                    };
                    // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                    findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                        int targetShardId = OperationRouting.generateShardId(indexMetaData, null,
                                ref.utf8ToString());
                        return shardId == targetShardId;
                    }, leafReader, maybeWrapConsumer.apply(bitSet::set));

                    // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                    // with a routing value from the next iteration an delete / select based on the ID.
                    if (terms.getDocCount() != leafReader.maxDoc()) {
                        // this is a special case where some of the docs have no routing values this sucks but it's possible today
                        FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                        findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
                                maybeWrapConsumer.apply(hasRoutingValue::set));
                        IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set);
                        findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
                            if (hasRoutingValue.get(docId) == false) {
                                bitSetConsumer.accept(docId);
                            }
                        });
                    }
                }
                if (parentBitSet != null) {
                    // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc.
                    markChildDocs(parentBitSet, bitSet);
                }
            }

            return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
            // This is not a regular query, let's not cache it. It wouldn't help
            // anyway.
            return false;
        }
    };
}

From source file:org.geotoolkit.lucene.filter.LuceneOGCFilter.java

License:Open Source License

/**
 * {@inheritDoc }/*from  w  w  w  .j av a 2s  .  c  o  m*/
 */
@Override
public DocIdSet getDocIdSet(final LeafReaderContext ctx, final Bits b) throws IOException {

    boolean treeSearch = false;
    boolean reverse = false;
    boolean distanceFilter = false;
    final Set<String> treeMatching = new HashSet<>();
    if (tree != null) {
        /*
         * For distance buffer filter no envelope only mode
         */
        if (filter instanceof DistanceBufferOperator) {
            distanceFilter = true;
            reverse = filter instanceof Beyond;
            final DistanceBufferOperator sp = (DistanceBufferOperator) filter;
            if (sp.getExpression2() instanceof Literal) {
                try {
                    final Literal lit = (Literal) sp.getExpression2();
                    final GeneralEnvelope bound = getExtendedReprojectedEnvelope(lit.getValue(), tree.getCrs(),
                            sp.getDistanceUnits(), sp.getDistance());
                    final int[] resultID = tree.searchID(bound);
                    Arrays.sort(resultID);
                    treeMatching.clear();
                    TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                    for (int id : resultID) {
                        final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                        if (env != null) {
                            treeMatching.add(env.getId());
                        }
                    }
                    treeSearch = true;
                } catch (FactoryException ex) {
                    throw new IOException(ex);
                } catch (StoreIndexException ex) {
                    Throwable cause = ex.getCause();
                    if (cause instanceof IOException) {
                        throw (IOException) cause;
                    } else {
                        throw new IOException(ex);
                    }
                }
            } else {
                LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2());
            }

        } else if (filter instanceof BinarySpatialOperator) {
            final BinarySpatialOperator sp = (BinarySpatialOperator) filter;
            if (sp.getExpression2() instanceof Literal) {
                final Literal lit = (Literal) sp.getExpression2();
                final Envelope boundFilter = getReprojectedEnvelope(lit.getValue(), tree.getCrs());
                try {
                    if (filterType == SpatialFilterType.CROSSES || !envelopeOnly) {
                        if (filterType == SpatialFilterType.DISJOINT) {
                            reverse = true;
                        }
                        final int[] resultID = tree.searchID(boundFilter);
                        Arrays.sort(resultID);
                        final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                        treeMatching.clear();
                        for (int id : resultID) {
                            final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                            if (env != null) {
                                treeMatching.add(env.getId());
                            }
                        }
                        treeSearch = true;
                        envelopeOnly = false;
                    } else {
                        final int[] resultID = TreeX.search(tree, boundFilter, filterType);
                        Arrays.sort(resultID);
                        final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                        treeMatching.clear();
                        for (int id : resultID) {
                            final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                            if (env != null) {
                                treeMatching.add(env.getId());
                            }
                        }
                        treeSearch = true;
                    }
                } catch (StoreIndexException ex) {
                    Throwable cause = ex.getCause();
                    if (cause instanceof IOException) {
                        throw (IOException) cause;
                    } else {
                        throw new IOException(ex);
                    }
                }
            } else {
                LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2());
            }
        } else {
            LOGGER.log(Level.WARNING, "not a spatial operator:{0}", filter.getClass().getName());
        }
    } else {
        LOGGER.finer("Null R-tree in spatial search");
    }

    final LeafReader reader = ctx.reader();
    final BitDocIdSet set = new BitDocIdSet(new FixedBitSet(reader.maxDoc()));
    final DocsEnum termDocs = reader.termDocsEnum(META_FIELD);
    int n = termDocs.nextDoc();
    while (n != DocsEnum.NO_MORE_DOCS) {
        final int docId = termDocs.docID();
        final Document doc = reader.document(docId, ID_FIELDS);
        final String id = doc.get(IDENTIFIER_FIELD_NAME);
        final boolean match = treeMatching.contains(id);
        if (treeSearch && reverse && !match) {
            set.bits().set(docId);

        } else if (!treeSearch || match) {
            if (envelopeOnly && !distanceFilter) {
                set.bits().set(docId);
            } else {
                final Document geoDoc = reader.document(docId, GEOMETRY_FIELDS);
                if (filter.evaluate(geoDoc)) {
                    set.bits().set(docId);
                }
            }
        }
        n = termDocs.nextDoc();
    }

    return set;
}

From source file:org.hibernate.search.filter.impl.CachingWrapperQuery.java

License:LGPL

/**
 * Default cache implementation: uses {@link RoaringDocIdSet}.
 *///from w ww . j  ava 2s.c  o  m
protected DocIdSet cacheImpl(DocIdSetIterator iterator, LeafReader reader) throws IOException {
    return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build();
}

From source file:org.hibernate.search.spatial.impl.SpatialHashFilter.java

License:LGPL

/**
 * Search the index for document having the correct spatial hash cell id at given grid level.
 *
 * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}.
 * @param acceptDocs Bits that represent the allowable docs to match (typically deleted docs but possibly filtering
 * other documents)// ww w.  ja  va2s  .c  o  m
 * @return a {@link DocIdSet} with the document ids matching
 */
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
    if (spatialHashCellsIds.size() == 0) {
        return null;
    }

    final LeafReader atomicReader = context.reader();

    BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc()));
    Boolean found = false;
    for (int i = 0; i < spatialHashCellsIds.size(); i++) {
        Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i));
        DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum(spatialHashCellTerm);
        if (spatialHashCellsDocs != null) {
            while (true) {
                final int docId = spatialHashCellsDocs.nextDoc();
                if (docId == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                } else {
                    if (acceptDocs == null || acceptDocs.get(docId)) {
                        matchedDocumentsIds.bits().set(docId);
                        found = true;
                    }
                }
            }
        }
    }

    if (found) {
        return matchedDocumentsIds;
    } else {
        return null;
    }
}

From source file:org.hibernate.search.spatial.impl.SpatialHashQuery.java

License:LGPL

/**
 * Search the index for document having the correct spatial hash cell id at given grid level.
 *
 * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}.
 * @return a {@link DocIdSetIterator} with the matching document ids
 *///from   ww w .j  a  va  2s. com
private DocIdSetIterator createDocIdSetIterator(LeafReaderContext context) throws IOException {
    if (spatialHashCellsIds.size() == 0) {
        return null;
    }

    final LeafReader atomicReader = context.reader();

    BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc()));
    boolean found = false;
    for (int i = 0; i < spatialHashCellsIds.size(); i++) {
        Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i));
        PostingsEnum spatialHashCellsDocs = atomicReader.postings(spatialHashCellTerm);
        if (spatialHashCellsDocs != null) {
            while (true) {
                final int docId = spatialHashCellsDocs.nextDoc();
                if (docId == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                } else {
                    matchedDocumentsIds.bits().set(docId);
                    found = true;
                }
            }
        }
    }

    if (found) {
        return matchedDocumentsIds.iterator();
    } else {
        return DocIdSetIterator.empty();
    }
}

From source file:org.modeshape.jcr.index.lucene.query.ConstantScoreWeightQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    Set<String> fieldSet = Collections.singleton(field);
    // return a weight which uses a constant (1.0f) scorer...
    return new RandomAccessWeight(this) {
        @Override/*from   ww  w.  j ava 2  s . c om*/
        protected Bits getMatchingDocs(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            Bits liveDocs = leafReader.getLiveDocs();
            // if liveDocs is null it means there are no deleted documents...
            int docsCount = liveDocs != null ? liveDocs.length() : leafReader.numDocs();
            FixedBitSet result = new FixedBitSet(leafReader.maxDoc());
            for (int i = 0; i < docsCount; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    continue;
                }
                Document document = leafReader.document(i, fieldSet);
                IndexableField[] fields = document.getFields(field);
                if (fields.length == 0) {
                    // the document doesn't have the field...
                    continue;
                }
                if (areValid(fields)) {
                    result.set(i);
                }
            }
            return result.cardinality() > 0 ? result : null;
        }
    };
}

From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator();
    }/*  ww  w  .j  av a  2s.c  o m*/

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    // for each id given
    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        // for each leaf reader..
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final LeafReader subReader = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for?
            if (termsEnum.seekExact(id)) {
                final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0);
                // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check)
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    Bits liveDocs = subReader.getLiveDocs();
                    // But wait, maybe some of the docs have been deleted! Check that too..
                    if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += subReader.maxDoc();
        }
    }

    return results;
}

From source file:perf.SearchPerfTest.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    // args: dirImpl indexPath numThread numIterPerThread
    // eg java SearchPerfTest /path/to/index 4 100
    final Args args = new Args(clArgs);

    Directory dir0;/*from  w w  w.j a v  a  2s.co  m*/
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dirImpl = args.getString("-dirImpl");

    OpenDirectory od = OpenDirectory.get(dirImpl);

    /*
    } else if (dirImpl.equals("NativePosixMMapDirectory")) {
      dir0 = new NativePosixMMapDirectory(new File(dirPath));
      ramDir = null;
      if (doFacets) {
        facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath));
      }
    } else if (dirImpl.equals("CachingDirWrapper")) {
      dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath)));
      ramDir = null;
    } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
      // Load only non-postings files into RAMDir (assumes
      // Lucene40PF is the wrapped PF):
      Set<String> postingsExtensions = new HashSet<String>();
      postingsExtensions.add("frq");
      postingsExtensions.add("prx");
      postingsExtensions.add("tip");
      postingsExtensions.add("tim");
              
      ramDir =  new RAMDirectory();
      Directory fsDir = new MMapDirectory(new File(dirPath));
      for (String file : fsDir.listAll()) {
        int idx = file.indexOf('.');
        if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) {
          continue;
        }
            
        fsDir.copy(ramDir, file, file, IOContext.READ);
      }
      dir0 = new FileSwitchDirectory(postingsExtensions,
                             fsDir,
                             ramDir,
                             true);
      if (doFacets) {
        facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ);
      }
      */

    final RAMDirectory ramDir;
    dir0 = od.open(Paths.get(dirPath));
    if (dir0 instanceof RAMDirectory) {
        ramDir = (RAMDirectory) dir0;
    } else {
        ramDir = null;
    }

    // TODO: NativeUnixDir?

    final String analyzer = args.getString("-analyzer");
    final String tasksFile = args.getString("-taskSource");
    final int searchThreadCount = args.getInt("-searchThreadCount");
    final String fieldName = args.getString("-field");
    final boolean printHeap = args.getFlag("-printHeap");
    final boolean doPKLookup = args.getFlag("-pk");
    final int topN = args.getInt("-topN");
    final boolean doStoredLoads = args.getFlag("-loadStoredFields");

    // Used to choose which random subset of tasks we will
    // run, to generate the PKLookup tasks, and to generate
    // any random pct filters:
    final long staticRandomSeed = args.getLong("-staticSeed");

    // Used to shuffle the random subset of tasks:
    final long randomSeed = args.getLong("-seed");

    // TODO: this could be way better.
    final String similarity = args.getString("-similarity");
    // now reflect
    final Class<? extends Similarity> simClazz = Class
            .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class);
    final Similarity sim = simClazz.newInstance();

    System.out.println("Using dir impl " + dir0.getClass().getName());
    System.out.println("Analyzer " + analyzer);
    System.out.println("Similarity " + similarity);
    System.out.println("Search thread count " + searchThreadCount);
    System.out.println("topN " + topN);
    System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit");
    System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes");

    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("ClassicAnalyzer")) {
        a = new ClassicAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2,
                ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final ReferenceManager<IndexSearcher> mgr;
    final IndexWriter writer;
    final Directory dir;

    final String commit = args.getString("-commit");
    final String hiliteImpl = args.getString("-hiliteImpl");

    final String logFile = args.getString("-log");

    final long tSearcherStart = System.currentTimeMillis();

    final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum");
    final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes");

    if (recacheFilterDeletes) {
        throw new UnsupportedOperationException("recacheFilterDeletes was deprecated");
    }

    if (args.getFlag("-nrt")) {
        // TODO: get taxoReader working here too
        // TODO: factor out & share this CL processing w/ Indexer
        final int indexThreadCount = args.getInt("-indexThreadCount");
        final String lineDocsFile = args.getString("-lineDocsFile");
        final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread");
        final float reopenEverySec = args.getFloat("-reopenEverySec");
        final boolean storeBody = args.getFlag("-store");
        final boolean tvsBody = args.getFlag("-tvs");
        final boolean useCFS = args.getFlag("-cfs");
        final String defaultPostingsFormat = args.getString("-postingsFormat");
        final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
        final boolean verbose = args.getFlag("-verbose");
        final boolean cloneDocs = args.getFlag("-cloneDocs");
        final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT));

        final long reopenEveryMS = (long) (1000 * reopenEverySec);

        if (verbose) {
            InfoStream.setDefault(new PrintStreamInfoStream(System.out));
        }

        if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
            System.out.println("Wrap NRTCachingDirectory");
            dir0 = new NRTCachingDirectory(dir0, 20, 400.0);
        }

        dir = dir0;

        final IndexWriterConfig iwc = new IndexWriterConfig(a);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
        iwc.setRAMBufferSizeMB(256.0);
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);

        // TODO: also RAMDirExceptDirect...?  need to
        // ... block deletes against wrapped FSDir?
        if (dirImpl.equals("RAMDirectory")) {
            // Let IW remove files only referenced by starting commit:
            iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy());
        }

        if (commit != null && commit.length() > 0) {
            System.out.println("Opening writer on commit=" + commit);
            iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
        }

        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024);
        //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4);

        final Codec codec = new Lucene62Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return PostingsFormat
                        .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
            }
        };
        iwc.setCodec(codec);

        final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // Only let one merge run at a time...
        // ... but queue up up to 4, before index thread is stalled:
        cms.setMaxMergesAndThreads(4, 1);

        iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                final long t0 = System.currentTimeMillis();
                //System.out.println("DO WARM: " + reader);
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.search(new TermQuery(new Term(fieldName, "united")), 10);
                final long t1 = System.currentTimeMillis();
                System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took "
                        + (t1 - t0) + " msec");
            }
        });

        writer = new IndexWriter(dir, iwc);
        System.out.println("Initial writer.maxDoc()=" + writer.maxDoc());

        // TODO: add -nrtBodyPostingsOffsets instead of
        // hardwired false:
        boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE;
        LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs,
                null, null, null, addDVFields);
        IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs,
                indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1);
        threads.start();

        mgr = new SearcherManager(writer, new SearcherFactory() {
            @Override
            public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) {
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.setSimilarity(sim);
                return s;
            }
        });

        System.out.println("reopen every " + reopenEverySec);

        Thread reopenThread = new Thread() {
            @Override
            public void run() {
                try {
                    final long startMS = System.currentTimeMillis();

                    int reopenCount = 1;
                    while (true) {
                        final long sleepMS = startMS + (reopenCount * reopenEveryMS)
                                - System.currentTimeMillis();
                        if (sleepMS < 0) {
                            System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms");
                        } else {
                            Thread.sleep(sleepMS);
                        }

                        Thread.sleep(sleepMS);
                        mgr.maybeRefresh();
                        reopenCount++;
                        IndexSearcher s = mgr.acquire();
                        try {
                            if (ramDir != null) {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(),
                                        writer.maxDoc(), s.getIndexReader().maxDoc(),
                                        s.getIndexReader().numDocs()));
                                //String[] l = ramDir.listAll();
                                //Arrays.sort(l);
                                //for(String f : l) {
                                //System.out.println("  " + f + ": " + ramDir.fileLength(f));
                                //}
                            } else {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(),
                                        s.getIndexReader().maxDoc(), s.getIndexReader().numDocs()));
                            }
                        } finally {
                            mgr.release(s);
                        }
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        reopenThread.setName("ReopenThread");
        reopenThread.setPriority(4 + Thread.currentThread().getPriority());
        reopenThread.start();

    } else {
        dir = dir0;
        writer = null;
        final DirectoryReader reader;
        if (commit != null && commit.length() > 0) {
            System.out.println("Opening searcher on commit=" + commit);
            reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir));
        } else {
            // open last commit
            reader = DirectoryReader.open(dir);
        }
        IndexSearcher s = new IndexSearcher(reader);
        s.setQueryCache(null); // don't bench the cache
        s.setSimilarity(sim);
        System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes="
                + (100. * reader.maxDoc() / reader.numDocs()));

        mgr = new SingleIndexSearcher(s);
    }

    System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT");

    {
        IndexSearcher s = mgr.acquire();
        try {
            System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc="
                    + s.getIndexReader().maxDoc() + ": " + s);
        } finally {
            mgr.release(s);
        }
    }

    //System.out.println("searcher=" + searcher);

    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);

    TaxonomyReader taxoReader;
    Path taxoPath = Paths.get(args.getString("-indexPath"), "facets");
    Directory taxoDir = od.open(taxoPath);
    if (DirectoryReader.indexExists(taxoDir)) {
        taxoReader = new DirectoryTaxonomyReader(taxoDir);
        System.out.println("Taxonomy has " + taxoReader.getSize() + " ords");
    } else {
        taxoReader = null;
    }

    final Random staticRandom = new Random(staticRandomSeed);
    final Random random = new Random(randomSeed);

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl,
            facetsConfig);

    final QueryParser queryParser = new QueryParser("body", a);
    TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom,
            doStoredLoads);

    final TaskSource tasks;

    if (tasksFile.startsWith("server:")) {
        int idx = tasksFile.indexOf(':', 8);
        if (idx == -1) {
            throw new RuntimeException(
                    "server is missing the port; should be server:interface:port (got: " + tasksFile + ")");
        }
        String iface = tasksFile.substring(7, idx);
        int port = Integer.valueOf(tasksFile.substring(1 + idx));
        RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser);

        // nocommit must stop thread?
        tasks = remoteTasks;
    } else {
        // Load the tasks from a file:
        final int taskRepeatCount = args.getInt("-taskRepeatCount");
        final int numTaskPerCat = args.getInt("-tasksPerCat");
        tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat,
                taskRepeatCount, doPKLookup);
        System.out.println("Task repeat count " + taskRepeatCount);
        System.out.println("Tasks file " + tasksFile);
        System.out.println("Num task per cat " + numTaskPerCat);
    }

    args.check();

    // Evil respeller:
    //spellChecker.setMinPrefix(0);
    //spellChecker.setMaxInspections(1024);
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount);
    Thread.sleep(10);

    final long startNanos = System.nanoTime();
    taskThreads.start();
    taskThreads.finish();
    final long endNanos = System.nanoTime();

    System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total");

    final List<Task> allTasks = tasks.getAllTasks();

    PrintStream out = new PrintStream(logFile);

    if (allTasks != null) {
        // Tasks were local: verify checksums:

        // indexState.setDocIDToID();

        final Map<Task, Task> tasksSeen = new HashMap<Task, Task>();

        out.println("\nResults for " + allTasks.size() + " tasks:");

        boolean fail = false;
        for (final Task task : allTasks) {
            if (verifyCheckSum) {
                final Task other = tasksSeen.get(task);
                if (other != null) {
                    if (task.checksum() != other.checksum()) {
                        System.out.println("\nTASK:");
                        task.printResults(System.out, indexState);
                        System.out.println("\nOTHER TASK:");
                        other.printResults(System.out, indexState);
                        fail = true;
                        //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other);
                    }
                } else {
                    tasksSeen.put(task, task);
                }
            }
            out.println("\nTASK: " + task);
            out.println("  " + (task.runTimeNanos / 1000000.0) + " msec");
            out.println("  thread " + task.threadID);
            task.printResults(out, indexState);
        }
        if (fail) {
            throw new RuntimeException("some tasks got different results across different threads");
        }

        allTasks.clear();
    }

    mgr.close();

    if (taxoReader != null) {
        taxoReader.close();
    }

    if (writer != null) {
        // Don't actually commit any index changes:
        writer.rollback();
    }

    dir.close();

    if (printHeap) {

        // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html
        final Runtime runtime = Runtime.getRuntime();
        long usedMem1 = PerfUtils.usedMemory(runtime);
        long usedMem2 = Long.MAX_VALUE;
        for (int iter = 0; iter < 10; iter++) {
            runtime.runFinalization();
            runtime.gc();
            Thread.yield();
            Thread.sleep(100);
            usedMem2 = usedMem1;
            usedMem1 = PerfUtils.usedMemory(runtime);
        }
        out.println("\nHEAP: " + PerfUtils.usedMemory(runtime));
    }
    out.close();
}