Example usage for org.apache.lucene.index LeafReader maxDoc

List of usage examples for org.apache.lucene.index LeafReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java

License:Apache License

/**
 * Backward compatibility support for legacy lat/lon double arrays
 *///from   w  ww. j  a  v a  2s  . com
private AtomicGeoPointFieldData loadLegacyFieldData(LeafReader reader, NonEstimatingEstimator estimator,
        Terms terms, AtomicGeoPointFieldData data) throws Exception {
    DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
    DoubleArray lon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    boolean success = false;
    try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(),
            acceptableTransientOverheadRatio)) {
        final GeoPointTermsEnumLegacy iter = new GeoPointTermsEnumLegacy(
                builder.buildFromTerms(terms.iterator()));
        GeoPoint point;
        long numTerms = 0;
        while ((point = iter.next()) != null) {
            lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms + 1);
            lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms + 1);
            lat.set(numTerms, point.getLat());
            lon.set(numTerms, point.getLon());
            ++numTerms;
        }
        lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms);
        lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms);

        Ordinals build = builder.build(fieldDataType.getSettings());
        RandomAccessOrds ordinals = build.ordinals();
        if (!(FieldData.isMultiValued(ordinals) || CommonSettings
                .getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) {
            int maxDoc = reader.maxDoc();
            DoubleArray sLat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
            DoubleArray sLon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
            for (int i = 0; i < maxDoc; i++) {
                ordinals.setDocument(i);
                long nativeOrdinal = ordinals.nextOrd();
                if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) {
                    sLat.set(i, lat.get(nativeOrdinal));
                    sLon.set(i, lon.get(nativeOrdinal));
                }
            }
            BitSet set = builder.buildDocsWithValuesSet();
            data = new GeoPointArrayLegacyAtomicFieldData.Single(sLon, sLat, set);
        } else {
            data = new GeoPointArrayLegacyAtomicFieldData.WithOrdinals(lon, lat, build, reader.maxDoc());
        }
        success = true;
        return data;
    } finally {
        if (success) {
            estimator.afterLoad(null, data.ramBytesUsed());
        }
    }
}

From source file:org.elasticsearch.index.shard.IndexShardTestCase.java

License:Apache License

protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
    shard.refresh("get_uids");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        Set<Uid> ids = new HashSet<>();
        for (LeafReaderContext leafContext : searcher.reader().leaves()) {
            LeafReader reader = leafContext.reader();
            Bits liveDocs = reader.getLiveDocs();
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
                    ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
                }/*  w ww .j av a 2 s.co  m*/
            }
        }
        return ids;
    }
}

From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
    return new ConstantScoreWeight(this, boost) {
        @Override/* w  ww  . j  a  va 2s.c o m*/
        public String toString() {
            return "weight(delete docs query)";
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc());
            Terms terms = leafReader.terms(RoutingFieldMapper.NAME);
            Predicate<BytesRef> includeInShard = ref -> {
                int targetShardId = OperationRouting.generateShardId(indexMetaData,
                        Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                return shardId == targetShardId;
            };
            if (terms == null) {
                // this is the common case - no partitioning and no _routing values
                // in this case we also don't do anything special with regards to nested docs since we basically delete
                // by ID and parent and nested all have the same id.
                assert indexMetaData.isRoutingPartitionedIndex() == false;
                findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
            } else {
                final BitSet parentBitSet;
                if (nestedParentBitSetProducer == null) {
                    parentBitSet = null;
                } else {
                    parentBitSet = nestedParentBitSetProducer.getBitSet(context);
                    if (parentBitSet == null) {
                        return null; // no matches
                    }
                }
                if (indexMetaData.isRoutingPartitionedIndex()) {
                    // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                    // this this index is routing partitioned.
                    Visitor visitor = new Visitor(leafReader);
                    TwoPhaseIterator twoPhaseIterator = parentBitSet == null
                            ? new RoutingPartitionedDocIdSetIterator(visitor)
                            : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
                    return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                } else {
                    // here we potentially guard the docID consumers with our parent bitset if we have one.
                    // this ensures that we are only marking root documents in the nested case and if necessary
                    // we do a second pass to mark the corresponding children in markChildDocs
                    Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> {
                        if (parentBitSet != null) {
                            return docId -> {
                                if (parentBitSet.get(docId)) {
                                    consumer.accept(docId);
                                }
                            };
                        }
                        return consumer;
                    };
                    // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                    findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                        int targetShardId = OperationRouting.generateShardId(indexMetaData, null,
                                ref.utf8ToString());
                        return shardId == targetShardId;
                    }, leafReader, maybeWrapConsumer.apply(bitSet::set));

                    // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                    // with a routing value from the next iteration an delete / select based on the ID.
                    if (terms.getDocCount() != leafReader.maxDoc()) {
                        // this is a special case where some of the docs have no routing values this sucks but it's possible today
                        FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                        findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
                                maybeWrapConsumer.apply(hasRoutingValue::set));
                        IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set);
                        findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
                            if (hasRoutingValue.get(docId) == false) {
                                bitSetConsumer.accept(docId);
                            }
                        });
                    }
                }
                if (parentBitSet != null) {
                    // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc.
                    markChildDocs(parentBitSet, bitSet);
                }
            }

            return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
            // This is not a regular query, let's not cache it. It wouldn't help
            // anyway.
            return false;
        }
    };
}

From source file:org.geotoolkit.lucene.filter.LuceneOGCFilter.java

License:Open Source License

/**
 * {@inheritDoc }/*from  w  w  w  .j av a 2s  .  c  o  m*/
 */
@Override
public DocIdSet getDocIdSet(final LeafReaderContext ctx, final Bits b) throws IOException {

    boolean treeSearch = false;
    boolean reverse = false;
    boolean distanceFilter = false;
    final Set<String> treeMatching = new HashSet<>();
    if (tree != null) {
        /*
         * For distance buffer filter no envelope only mode
         */
        if (filter instanceof DistanceBufferOperator) {
            distanceFilter = true;
            reverse = filter instanceof Beyond;
            final DistanceBufferOperator sp = (DistanceBufferOperator) filter;
            if (sp.getExpression2() instanceof Literal) {
                try {
                    final Literal lit = (Literal) sp.getExpression2();
                    final GeneralEnvelope bound = getExtendedReprojectedEnvelope(lit.getValue(), tree.getCrs(),
                            sp.getDistanceUnits(), sp.getDistance());
                    final int[] resultID = tree.searchID(bound);
                    Arrays.sort(resultID);
                    treeMatching.clear();
                    TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                    for (int id : resultID) {
                        final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                        if (env != null) {
                            treeMatching.add(env.getId());
                        }
                    }
                    treeSearch = true;
                } catch (FactoryException ex) {
                    throw new IOException(ex);
                } catch (StoreIndexException ex) {
                    Throwable cause = ex.getCause();
                    if (cause instanceof IOException) {
                        throw (IOException) cause;
                    } else {
                        throw new IOException(ex);
                    }
                }
            } else {
                LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2());
            }

        } else if (filter instanceof BinarySpatialOperator) {
            final BinarySpatialOperator sp = (BinarySpatialOperator) filter;
            if (sp.getExpression2() instanceof Literal) {
                final Literal lit = (Literal) sp.getExpression2();
                final Envelope boundFilter = getReprojectedEnvelope(lit.getValue(), tree.getCrs());
                try {
                    if (filterType == SpatialFilterType.CROSSES || !envelopeOnly) {
                        if (filterType == SpatialFilterType.DISJOINT) {
                            reverse = true;
                        }
                        final int[] resultID = tree.searchID(boundFilter);
                        Arrays.sort(resultID);
                        final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                        treeMatching.clear();
                        for (int id : resultID) {
                            final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                            if (env != null) {
                                treeMatching.add(env.getId());
                            }
                        }
                        treeSearch = true;
                        envelopeOnly = false;
                    } else {
                        final int[] resultID = TreeX.search(tree, boundFilter, filterType);
                        Arrays.sort(resultID);
                        final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper();
                        treeMatching.clear();
                        for (int id : resultID) {
                            final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id);
                            if (env != null) {
                                treeMatching.add(env.getId());
                            }
                        }
                        treeSearch = true;
                    }
                } catch (StoreIndexException ex) {
                    Throwable cause = ex.getCause();
                    if (cause instanceof IOException) {
                        throw (IOException) cause;
                    } else {
                        throw new IOException(ex);
                    }
                }
            } else {
                LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2());
            }
        } else {
            LOGGER.log(Level.WARNING, "not a spatial operator:{0}", filter.getClass().getName());
        }
    } else {
        LOGGER.finer("Null R-tree in spatial search");
    }

    final LeafReader reader = ctx.reader();
    final BitDocIdSet set = new BitDocIdSet(new FixedBitSet(reader.maxDoc()));
    final DocsEnum termDocs = reader.termDocsEnum(META_FIELD);
    int n = termDocs.nextDoc();
    while (n != DocsEnum.NO_MORE_DOCS) {
        final int docId = termDocs.docID();
        final Document doc = reader.document(docId, ID_FIELDS);
        final String id = doc.get(IDENTIFIER_FIELD_NAME);
        final boolean match = treeMatching.contains(id);
        if (treeSearch && reverse && !match) {
            set.bits().set(docId);

        } else if (!treeSearch || match) {
            if (envelopeOnly && !distanceFilter) {
                set.bits().set(docId);
            } else {
                final Document geoDoc = reader.document(docId, GEOMETRY_FIELDS);
                if (filter.evaluate(geoDoc)) {
                    set.bits().set(docId);
                }
            }
        }
        n = termDocs.nextDoc();
    }

    return set;
}

From source file:org.hibernate.search.filter.impl.CachingWrapperQuery.java

License:LGPL

/**
 * Default cache implementation: uses {@link RoaringDocIdSet}.
 *///from w ww . j  ava 2s.c  o  m
protected DocIdSet cacheImpl(DocIdSetIterator iterator, LeafReader reader) throws IOException {
    return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build();
}

From source file:org.hibernate.search.spatial.impl.SpatialHashFilter.java

License:LGPL

/**
 * Search the index for document having the correct spatial hash cell id at given grid level.
 *
 * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}.
 * @param acceptDocs Bits that represent the allowable docs to match (typically deleted docs but possibly filtering
 * other documents)// ww w.  ja  va2s  .c  o  m
 * @return a {@link DocIdSet} with the document ids matching
 */
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
    if (spatialHashCellsIds.size() == 0) {
        return null;
    }

    final LeafReader atomicReader = context.reader();

    BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc()));
    Boolean found = false;
    for (int i = 0; i < spatialHashCellsIds.size(); i++) {
        Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i));
        DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum(spatialHashCellTerm);
        if (spatialHashCellsDocs != null) {
            while (true) {
                final int docId = spatialHashCellsDocs.nextDoc();
                if (docId == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                } else {
                    if (acceptDocs == null || acceptDocs.get(docId)) {
                        matchedDocumentsIds.bits().set(docId);
                        found = true;
                    }
                }
            }
        }
    }

    if (found) {
        return matchedDocumentsIds;
    } else {
        return null;
    }
}

From source file:org.hibernate.search.spatial.impl.SpatialHashQuery.java

License:LGPL

/**
 * Search the index for document having the correct spatial hash cell id at given grid level.
 *
 * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}.
 * @return a {@link DocIdSetIterator} with the matching document ids
 *///from   ww w .j  a  va  2s. com
private DocIdSetIterator createDocIdSetIterator(LeafReaderContext context) throws IOException {
    if (spatialHashCellsIds.size() == 0) {
        return null;
    }

    final LeafReader atomicReader = context.reader();

    BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc()));
    boolean found = false;
    for (int i = 0; i < spatialHashCellsIds.size(); i++) {
        Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i));
        PostingsEnum spatialHashCellsDocs = atomicReader.postings(spatialHashCellTerm);
        if (spatialHashCellsDocs != null) {
            while (true) {
                final int docId = spatialHashCellsDocs.nextDoc();
                if (docId == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                } else {
                    matchedDocumentsIds.bits().set(docId);
                    found = true;
                }
            }
        }
    }

    if (found) {
        return matchedDocumentsIds.iterator();
    } else {
        return DocIdSetIterator.empty();
    }
}

From source file:org.modeshape.jcr.index.lucene.query.ConstantScoreWeightQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    Set<String> fieldSet = Collections.singleton(field);
    // return a weight which uses a constant (1.0f) scorer...
    return new RandomAccessWeight(this) {
        @Override/*from   ww  w.  j ava 2  s . c om*/
        protected Bits getMatchingDocs(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            Bits liveDocs = leafReader.getLiveDocs();
            // if liveDocs is null it means there are no deleted documents...
            int docsCount = liveDocs != null ? liveDocs.length() : leafReader.numDocs();
            FixedBitSet result = new FixedBitSet(leafReader.maxDoc());
            for (int i = 0; i < docsCount; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    continue;
                }
                Document document = leafReader.document(i, fieldSet);
                IndexableField[] fields = document.getFields(field);
                if (fields.length == 0) {
                    // the document doesn't have the field...
                    continue;
                }
                if (areValid(fields)) {
                    result.set(i);
                }
            }
            return result.cardinality() > 0 ? result : null;
        }
    };
}

From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator();
    }/*  ww  w  .j  av a  2s.c  o m*/

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    // for each id given
    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        // for each leaf reader..
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final LeafReader subReader = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for?
            if (termsEnum.seekExact(id)) {
                final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0);
                // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check)
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    Bits liveDocs = subReader.getLiveDocs();
                    // But wait, maybe some of the docs have been deleted! Check that too..
                    if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += subReader.maxDoc();
        }
    }

    return results;
}

From source file:perf.SearchPerfTest.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    // args: dirImpl indexPath numThread numIterPerThread
    // eg java SearchPerfTest /path/to/index 4 100
    final Args args = new Args(clArgs);

    Directory dir0;/*from  w w  w.j a v  a  2s.co  m*/
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dirImpl = args.getString("-dirImpl");

    OpenDirectory od = OpenDirectory.get(dirImpl);

    /*
    } else if (dirImpl.equals("NativePosixMMapDirectory")) {
      dir0 = new NativePosixMMapDirectory(new File(dirPath));
      ramDir = null;
      if (doFacets) {
        facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath));
      }
    } else if (dirImpl.equals("CachingDirWrapper")) {
      dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath)));
      ramDir = null;
    } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
      // Load only non-postings files into RAMDir (assumes
      // Lucene40PF is the wrapped PF):
      Set<String> postingsExtensions = new HashSet<String>();
      postingsExtensions.add("frq");
      postingsExtensions.add("prx");
      postingsExtensions.add("tip");
      postingsExtensions.add("tim");
              
      ramDir =  new RAMDirectory();
      Directory fsDir = new MMapDirectory(new File(dirPath));
      for (String file : fsDir.listAll()) {
        int idx = file.indexOf('.');
        if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) {
          continue;
        }
            
        fsDir.copy(ramDir, file, file, IOContext.READ);
      }
      dir0 = new FileSwitchDirectory(postingsExtensions,
                             fsDir,
                             ramDir,
                             true);
      if (doFacets) {
        facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ);
      }
      */

    final RAMDirectory ramDir;
    dir0 = od.open(Paths.get(dirPath));
    if (dir0 instanceof RAMDirectory) {
        ramDir = (RAMDirectory) dir0;
    } else {
        ramDir = null;
    }

    // TODO: NativeUnixDir?

    final String analyzer = args.getString("-analyzer");
    final String tasksFile = args.getString("-taskSource");
    final int searchThreadCount = args.getInt("-searchThreadCount");
    final String fieldName = args.getString("-field");
    final boolean printHeap = args.getFlag("-printHeap");
    final boolean doPKLookup = args.getFlag("-pk");
    final int topN = args.getInt("-topN");
    final boolean doStoredLoads = args.getFlag("-loadStoredFields");

    // Used to choose which random subset of tasks we will
    // run, to generate the PKLookup tasks, and to generate
    // any random pct filters:
    final long staticRandomSeed = args.getLong("-staticSeed");

    // Used to shuffle the random subset of tasks:
    final long randomSeed = args.getLong("-seed");

    // TODO: this could be way better.
    final String similarity = args.getString("-similarity");
    // now reflect
    final Class<? extends Similarity> simClazz = Class
            .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class);
    final Similarity sim = simClazz.newInstance();

    System.out.println("Using dir impl " + dir0.getClass().getName());
    System.out.println("Analyzer " + analyzer);
    System.out.println("Similarity " + similarity);
    System.out.println("Search thread count " + searchThreadCount);
    System.out.println("topN " + topN);
    System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit");
    System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes");

    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("ClassicAnalyzer")) {
        a = new ClassicAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2,
                ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final ReferenceManager<IndexSearcher> mgr;
    final IndexWriter writer;
    final Directory dir;

    final String commit = args.getString("-commit");
    final String hiliteImpl = args.getString("-hiliteImpl");

    final String logFile = args.getString("-log");

    final long tSearcherStart = System.currentTimeMillis();

    final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum");
    final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes");

    if (recacheFilterDeletes) {
        throw new UnsupportedOperationException("recacheFilterDeletes was deprecated");
    }

    if (args.getFlag("-nrt")) {
        // TODO: get taxoReader working here too
        // TODO: factor out & share this CL processing w/ Indexer
        final int indexThreadCount = args.getInt("-indexThreadCount");
        final String lineDocsFile = args.getString("-lineDocsFile");
        final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread");
        final float reopenEverySec = args.getFloat("-reopenEverySec");
        final boolean storeBody = args.getFlag("-store");
        final boolean tvsBody = args.getFlag("-tvs");
        final boolean useCFS = args.getFlag("-cfs");
        final String defaultPostingsFormat = args.getString("-postingsFormat");
        final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
        final boolean verbose = args.getFlag("-verbose");
        final boolean cloneDocs = args.getFlag("-cloneDocs");
        final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT));

        final long reopenEveryMS = (long) (1000 * reopenEverySec);

        if (verbose) {
            InfoStream.setDefault(new PrintStreamInfoStream(System.out));
        }

        if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
            System.out.println("Wrap NRTCachingDirectory");
            dir0 = new NRTCachingDirectory(dir0, 20, 400.0);
        }

        dir = dir0;

        final IndexWriterConfig iwc = new IndexWriterConfig(a);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
        iwc.setRAMBufferSizeMB(256.0);
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);

        // TODO: also RAMDirExceptDirect...?  need to
        // ... block deletes against wrapped FSDir?
        if (dirImpl.equals("RAMDirectory")) {
            // Let IW remove files only referenced by starting commit:
            iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy());
        }

        if (commit != null && commit.length() > 0) {
            System.out.println("Opening writer on commit=" + commit);
            iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
        }

        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024);
        //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4);

        final Codec codec = new Lucene62Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return PostingsFormat
                        .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
            }
        };
        iwc.setCodec(codec);

        final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // Only let one merge run at a time...
        // ... but queue up up to 4, before index thread is stalled:
        cms.setMaxMergesAndThreads(4, 1);

        iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                final long t0 = System.currentTimeMillis();
                //System.out.println("DO WARM: " + reader);
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.search(new TermQuery(new Term(fieldName, "united")), 10);
                final long t1 = System.currentTimeMillis();
                System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took "
                        + (t1 - t0) + " msec");
            }
        });

        writer = new IndexWriter(dir, iwc);
        System.out.println("Initial writer.maxDoc()=" + writer.maxDoc());

        // TODO: add -nrtBodyPostingsOffsets instead of
        // hardwired false:
        boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE;
        LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs,
                null, null, null, addDVFields);
        IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs,
                indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1);
        threads.start();

        mgr = new SearcherManager(writer, new SearcherFactory() {
            @Override
            public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) {
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.setSimilarity(sim);
                return s;
            }
        });

        System.out.println("reopen every " + reopenEverySec);

        Thread reopenThread = new Thread() {
            @Override
            public void run() {
                try {
                    final long startMS = System.currentTimeMillis();

                    int reopenCount = 1;
                    while (true) {
                        final long sleepMS = startMS + (reopenCount * reopenEveryMS)
                                - System.currentTimeMillis();
                        if (sleepMS < 0) {
                            System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms");
                        } else {
                            Thread.sleep(sleepMS);
                        }

                        Thread.sleep(sleepMS);
                        mgr.maybeRefresh();
                        reopenCount++;
                        IndexSearcher s = mgr.acquire();
                        try {
                            if (ramDir != null) {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(),
                                        writer.maxDoc(), s.getIndexReader().maxDoc(),
                                        s.getIndexReader().numDocs()));
                                //String[] l = ramDir.listAll();
                                //Arrays.sort(l);
                                //for(String f : l) {
                                //System.out.println("  " + f + ": " + ramDir.fileLength(f));
                                //}
                            } else {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(),
                                        s.getIndexReader().maxDoc(), s.getIndexReader().numDocs()));
                            }
                        } finally {
                            mgr.release(s);
                        }
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        reopenThread.setName("ReopenThread");
        reopenThread.setPriority(4 + Thread.currentThread().getPriority());
        reopenThread.start();

    } else {
        dir = dir0;
        writer = null;
        final DirectoryReader reader;
        if (commit != null && commit.length() > 0) {
            System.out.println("Opening searcher on commit=" + commit);
            reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir));
        } else {
            // open last commit
            reader = DirectoryReader.open(dir);
        }
        IndexSearcher s = new IndexSearcher(reader);
        s.setQueryCache(null); // don't bench the cache
        s.setSimilarity(sim);
        System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes="
                + (100. * reader.maxDoc() / reader.numDocs()));

        mgr = new SingleIndexSearcher(s);
    }

    System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT");

    {
        IndexSearcher s = mgr.acquire();
        try {
            System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc="
                    + s.getIndexReader().maxDoc() + ": " + s);
        } finally {
            mgr.release(s);
        }
    }

    //System.out.println("searcher=" + searcher);

    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);

    TaxonomyReader taxoReader;
    Path taxoPath = Paths.get(args.getString("-indexPath"), "facets");
    Directory taxoDir = od.open(taxoPath);
    if (DirectoryReader.indexExists(taxoDir)) {
        taxoReader = new DirectoryTaxonomyReader(taxoDir);
        System.out.println("Taxonomy has " + taxoReader.getSize() + " ords");
    } else {
        taxoReader = null;
    }

    final Random staticRandom = new Random(staticRandomSeed);
    final Random random = new Random(randomSeed);

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl,
            facetsConfig);

    final QueryParser queryParser = new QueryParser("body", a);
    TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom,
            doStoredLoads);

    final TaskSource tasks;

    if (tasksFile.startsWith("server:")) {
        int idx = tasksFile.indexOf(':', 8);
        if (idx == -1) {
            throw new RuntimeException(
                    "server is missing the port; should be server:interface:port (got: " + tasksFile + ")");
        }
        String iface = tasksFile.substring(7, idx);
        int port = Integer.valueOf(tasksFile.substring(1 + idx));
        RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser);

        // nocommit must stop thread?
        tasks = remoteTasks;
    } else {
        // Load the tasks from a file:
        final int taskRepeatCount = args.getInt("-taskRepeatCount");
        final int numTaskPerCat = args.getInt("-tasksPerCat");
        tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat,
                taskRepeatCount, doPKLookup);
        System.out.println("Task repeat count " + taskRepeatCount);
        System.out.println("Tasks file " + tasksFile);
        System.out.println("Num task per cat " + numTaskPerCat);
    }

    args.check();

    // Evil respeller:
    //spellChecker.setMinPrefix(0);
    //spellChecker.setMaxInspections(1024);
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount);
    Thread.sleep(10);

    final long startNanos = System.nanoTime();
    taskThreads.start();
    taskThreads.finish();
    final long endNanos = System.nanoTime();

    System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total");

    final List<Task> allTasks = tasks.getAllTasks();

    PrintStream out = new PrintStream(logFile);

    if (allTasks != null) {
        // Tasks were local: verify checksums:

        // indexState.setDocIDToID();

        final Map<Task, Task> tasksSeen = new HashMap<Task, Task>();

        out.println("\nResults for " + allTasks.size() + " tasks:");

        boolean fail = false;
        for (final Task task : allTasks) {
            if (verifyCheckSum) {
                final Task other = tasksSeen.get(task);
                if (other != null) {
                    if (task.checksum() != other.checksum()) {
                        System.out.println("\nTASK:");
                        task.printResults(System.out, indexState);
                        System.out.println("\nOTHER TASK:");
                        other.printResults(System.out, indexState);
                        fail = true;
                        //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other);
                    }
                } else {
                    tasksSeen.put(task, task);
                }
            }
            out.println("\nTASK: " + task);
            out.println("  " + (task.runTimeNanos / 1000000.0) + " msec");
            out.println("  thread " + task.threadID);
            task.printResults(out, indexState);
        }
        if (fail) {
            throw new RuntimeException("some tasks got different results across different threads");
        }

        allTasks.clear();
    }

    mgr.close();

    if (taxoReader != null) {
        taxoReader.close();
    }

    if (writer != null) {
        // Don't actually commit any index changes:
        writer.rollback();
    }

    dir.close();

    if (printHeap) {

        // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html
        final Runtime runtime = Runtime.getRuntime();
        long usedMem1 = PerfUtils.usedMemory(runtime);
        long usedMem2 = Long.MAX_VALUE;
        for (int iter = 0; iter < 10; iter++) {
            runtime.runFinalization();
            runtime.gc();
            Thread.yield();
            Thread.sleep(100);
            usedMem2 = usedMem1;
            usedMem1 = PerfUtils.usedMemory(runtime);
        }
        out.println("\nHEAP: " + PerfUtils.usedMemory(runtime));
    }
    out.close();
}