List of usage examples for org.apache.lucene.index LeafReader maxDoc
public abstract int maxDoc();
From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java
License:Apache License
/** * Backward compatibility support for legacy lat/lon double arrays *///from w ww. j a v a 2s . com private AtomicGeoPointFieldData loadLegacyFieldData(LeafReader reader, NonEstimatingEstimator estimator, Terms terms, AtomicGeoPointFieldData data) throws Exception { DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128); DoubleArray lon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); boolean success = false; try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio)) { final GeoPointTermsEnumLegacy iter = new GeoPointTermsEnumLegacy( builder.buildFromTerms(terms.iterator())); GeoPoint point; long numTerms = 0; while ((point = iter.next()) != null) { lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms + 1); lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms + 1); lat.set(numTerms, point.getLat()); lon.set(numTerms, point.getLon()); ++numTerms; } lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms); lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms); Ordinals build = builder.build(fieldDataType.getSettings()); RandomAccessOrds ordinals = build.ordinals(); if (!(FieldData.isMultiValued(ordinals) || CommonSettings .getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) { int maxDoc = reader.maxDoc(); DoubleArray sLat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc()); DoubleArray sLon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc()); for (int i = 0; i < maxDoc; i++) { ordinals.setDocument(i); long nativeOrdinal = ordinals.nextOrd(); if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) { sLat.set(i, lat.get(nativeOrdinal)); sLon.set(i, lon.get(nativeOrdinal)); } } BitSet set = builder.buildDocsWithValuesSet(); data = new GeoPointArrayLegacyAtomicFieldData.Single(sLon, sLat, set); } else { data = new GeoPointArrayLegacyAtomicFieldData.WithOrdinals(lon, lat, build, reader.maxDoc()); } success = true; return data; } finally { if (success) { estimator.afterLoad(null, data.ramBytesUsed()); } } }
From source file:org.elasticsearch.index.shard.IndexShardTestCase.java
License:Apache License
protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException { shard.refresh("get_uids"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { Set<Uid> ids = new HashSet<>(); for (LeafReaderContext leafContext : searcher.reader().leaves()) { LeafReader reader = leafContext.reader(); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME)); ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME))); }/* w ww .j av a 2 s.co m*/ } } return ids; } }
From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) { return new ConstantScoreWeight(this, boost) { @Override/* w ww . j a va 2s.c o m*/ public String toString() { return "weight(delete docs query)"; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc()); Terms terms = leafReader.terms(RoutingFieldMapper.NAME); Predicate<BytesRef> includeInShard = ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, Uid.decodeId(ref.bytes, ref.offset, ref.length), null); return shardId == targetShardId; }; if (terms == null) { // this is the common case - no partitioning and no _routing values // in this case we also don't do anything special with regards to nested docs since we basically delete // by ID and parent and nested all have the same id. assert indexMetaData.isRoutingPartitionedIndex() == false; findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set); } else { final BitSet parentBitSet; if (nestedParentBitSetProducer == null) { parentBitSet = null; } else { parentBitSet = nestedParentBitSetProducer.getBitSet(context); if (parentBitSet == null) { return null; // no matches } } if (indexMetaData.isRoutingPartitionedIndex()) { // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing // this this index is routing partitioned. Visitor visitor = new Visitor(leafReader); TwoPhaseIterator twoPhaseIterator = parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet); return new ConstantScoreScorer(this, score(), twoPhaseIterator); } else { // here we potentially guard the docID consumers with our parent bitset if we have one. // this ensures that we are only marking root documents in the nested case and if necessary // we do a second pass to mark the corresponding children in markChildDocs Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> { if (parentBitSet != null) { return docId -> { if (parentBitSet.get(docId)) { consumer.accept(docId); } }; } return consumer; }; // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete findSplitDocs(RoutingFieldMapper.NAME, ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, null, ref.utf8ToString()); return shardId == targetShardId; }, leafReader, maybeWrapConsumer.apply(bitSet::set)); // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones // with a routing value from the next iteration an delete / select based on the ID. if (terms.getDocCount() != leafReader.maxDoc()) { // this is a special case where some of the docs have no routing values this sucks but it's possible today FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc()); findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader, maybeWrapConsumer.apply(hasRoutingValue::set)); IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set); findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> { if (hasRoutingValue.get(docId) == false) { bitSetConsumer.accept(docId); } }); } } if (parentBitSet != null) { // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc. markChildDocs(parentBitSet, bitSet); } } return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length())); } @Override public boolean isCacheable(LeafReaderContext ctx) { // This is not a regular query, let's not cache it. It wouldn't help // anyway. return false; } }; }
From source file:org.geotoolkit.lucene.filter.LuceneOGCFilter.java
License:Open Source License
/** * {@inheritDoc }/*from w w w .j av a 2s . c o m*/ */ @Override public DocIdSet getDocIdSet(final LeafReaderContext ctx, final Bits b) throws IOException { boolean treeSearch = false; boolean reverse = false; boolean distanceFilter = false; final Set<String> treeMatching = new HashSet<>(); if (tree != null) { /* * For distance buffer filter no envelope only mode */ if (filter instanceof DistanceBufferOperator) { distanceFilter = true; reverse = filter instanceof Beyond; final DistanceBufferOperator sp = (DistanceBufferOperator) filter; if (sp.getExpression2() instanceof Literal) { try { final Literal lit = (Literal) sp.getExpression2(); final GeneralEnvelope bound = getExtendedReprojectedEnvelope(lit.getValue(), tree.getCrs(), sp.getDistanceUnits(), sp.getDistance()); final int[] resultID = tree.searchID(bound); Arrays.sort(resultID); treeMatching.clear(); TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper(); for (int id : resultID) { final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id); if (env != null) { treeMatching.add(env.getId()); } } treeSearch = true; } catch (FactoryException ex) { throw new IOException(ex); } catch (StoreIndexException ex) { Throwable cause = ex.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else { throw new IOException(ex); } } } else { LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2()); } } else if (filter instanceof BinarySpatialOperator) { final BinarySpatialOperator sp = (BinarySpatialOperator) filter; if (sp.getExpression2() instanceof Literal) { final Literal lit = (Literal) sp.getExpression2(); final Envelope boundFilter = getReprojectedEnvelope(lit.getValue(), tree.getCrs()); try { if (filterType == SpatialFilterType.CROSSES || !envelopeOnly) { if (filterType == SpatialFilterType.DISJOINT) { reverse = true; } final int[] resultID = tree.searchID(boundFilter); Arrays.sort(resultID); final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper(); treeMatching.clear(); for (int id : resultID) { final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id); if (env != null) { treeMatching.add(env.getId()); } } treeSearch = true; envelopeOnly = false; } else { final int[] resultID = TreeX.search(tree, boundFilter, filterType); Arrays.sort(resultID); final TreeElementMapper<NamedEnvelope> tem = tree.getTreeElementMapper(); treeMatching.clear(); for (int id : resultID) { final NamedEnvelope env = tem.getObjectFromTreeIdentifier(id); if (env != null) { treeMatching.add(env.getId()); } } treeSearch = true; } } catch (StoreIndexException ex) { Throwable cause = ex.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else { throw new IOException(ex); } } } else { LOGGER.log(Level.WARNING, "Not a literal for spatial filter:{0}", sp.getExpression2()); } } else { LOGGER.log(Level.WARNING, "not a spatial operator:{0}", filter.getClass().getName()); } } else { LOGGER.finer("Null R-tree in spatial search"); } final LeafReader reader = ctx.reader(); final BitDocIdSet set = new BitDocIdSet(new FixedBitSet(reader.maxDoc())); final DocsEnum termDocs = reader.termDocsEnum(META_FIELD); int n = termDocs.nextDoc(); while (n != DocsEnum.NO_MORE_DOCS) { final int docId = termDocs.docID(); final Document doc = reader.document(docId, ID_FIELDS); final String id = doc.get(IDENTIFIER_FIELD_NAME); final boolean match = treeMatching.contains(id); if (treeSearch && reverse && !match) { set.bits().set(docId); } else if (!treeSearch || match) { if (envelopeOnly && !distanceFilter) { set.bits().set(docId); } else { final Document geoDoc = reader.document(docId, GEOMETRY_FIELDS); if (filter.evaluate(geoDoc)) { set.bits().set(docId); } } } n = termDocs.nextDoc(); } return set; }
From source file:org.hibernate.search.filter.impl.CachingWrapperQuery.java
License:LGPL
/** * Default cache implementation: uses {@link RoaringDocIdSet}. *///from w ww . j ava 2s.c o m protected DocIdSet cacheImpl(DocIdSetIterator iterator, LeafReader reader) throws IOException { return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build(); }
From source file:org.hibernate.search.spatial.impl.SpatialHashFilter.java
License:LGPL
/** * Search the index for document having the correct spatial hash cell id at given grid level. * * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}. * @param acceptDocs Bits that represent the allowable docs to match (typically deleted docs but possibly filtering * other documents)// ww w. ja va2s .c o m * @return a {@link DocIdSet} with the document ids matching */ @Override public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException { if (spatialHashCellsIds.size() == 0) { return null; } final LeafReader atomicReader = context.reader(); BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc())); Boolean found = false; for (int i = 0; i < spatialHashCellsIds.size(); i++) { Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i)); DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum(spatialHashCellTerm); if (spatialHashCellsDocs != null) { while (true) { final int docId = spatialHashCellsDocs.nextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (acceptDocs == null || acceptDocs.get(docId)) { matchedDocumentsIds.bits().set(docId); found = true; } } } } } if (found) { return matchedDocumentsIds; } else { return null; } }
From source file:org.hibernate.search.spatial.impl.SpatialHashQuery.java
License:LGPL
/** * Search the index for document having the correct spatial hash cell id at given grid level. * * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}. * @return a {@link DocIdSetIterator} with the matching document ids *///from ww w .j a va 2s. com private DocIdSetIterator createDocIdSetIterator(LeafReaderContext context) throws IOException { if (spatialHashCellsIds.size() == 0) { return null; } final LeafReader atomicReader = context.reader(); BitDocIdSet matchedDocumentsIds = new BitDocIdSet(new FixedBitSet(atomicReader.maxDoc())); boolean found = false; for (int i = 0; i < spatialHashCellsIds.size(); i++) { Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i)); PostingsEnum spatialHashCellsDocs = atomicReader.postings(spatialHashCellTerm); if (spatialHashCellsDocs != null) { while (true) { final int docId = spatialHashCellsDocs.nextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { break; } else { matchedDocumentsIds.bits().set(docId); found = true; } } } } if (found) { return matchedDocumentsIds.iterator(); } else { return DocIdSetIterator.empty(); } }
From source file:org.modeshape.jcr.index.lucene.query.ConstantScoreWeightQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { Set<String> fieldSet = Collections.singleton(field); // return a weight which uses a constant (1.0f) scorer... return new RandomAccessWeight(this) { @Override/*from ww w. j ava 2 s . c om*/ protected Bits getMatchingDocs(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); Bits liveDocs = leafReader.getLiveDocs(); // if liveDocs is null it means there are no deleted documents... int docsCount = liveDocs != null ? liveDocs.length() : leafReader.numDocs(); FixedBitSet result = new FixedBitSet(leafReader.maxDoc()); for (int i = 0; i < docsCount; i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = leafReader.document(i, fieldSet); IndexableField[] fields = document.getFields(field); if (fields.length == 0) { // the document doesn't have the field... continue; } if (areValid(fields)) { result.set(i); } } return result.cardinality() > 0 ? result : null; } }; }
From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java
License:Apache License
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(); }/* ww w .j av a 2s.c o m*/ int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } // for each id given for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); // for each leaf reader.. for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final LeafReader subReader = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for? if (termsEnum.seekExact(id)) { final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0); // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check) if (docs != null) { final int docID = docs.nextDoc(); Bits liveDocs = subReader.getLiveDocs(); // But wait, maybe some of the docs have been deleted! Check that too.. if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += subReader.maxDoc(); } } return results; }
From source file:perf.SearchPerfTest.java
License:Apache License
private static void _main(String[] clArgs) throws Exception { // args: dirImpl indexPath numThread numIterPerThread // eg java SearchPerfTest /path/to/index 4 100 final Args args = new Args(clArgs); Directory dir0;/*from w w w.j a v a 2s.co m*/ final String dirPath = args.getString("-indexPath") + "/index"; final String dirImpl = args.getString("-dirImpl"); OpenDirectory od = OpenDirectory.get(dirImpl); /* } else if (dirImpl.equals("NativePosixMMapDirectory")) { dir0 = new NativePosixMMapDirectory(new File(dirPath)); ramDir = null; if (doFacets) { facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath)); } } else if (dirImpl.equals("CachingDirWrapper")) { dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath))); ramDir = null; } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) { // Load only non-postings files into RAMDir (assumes // Lucene40PF is the wrapped PF): Set<String> postingsExtensions = new HashSet<String>(); postingsExtensions.add("frq"); postingsExtensions.add("prx"); postingsExtensions.add("tip"); postingsExtensions.add("tim"); ramDir = new RAMDirectory(); Directory fsDir = new MMapDirectory(new File(dirPath)); for (String file : fsDir.listAll()) { int idx = file.indexOf('.'); if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) { continue; } fsDir.copy(ramDir, file, file, IOContext.READ); } dir0 = new FileSwitchDirectory(postingsExtensions, fsDir, ramDir, true); if (doFacets) { facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ); } */ final RAMDirectory ramDir; dir0 = od.open(Paths.get(dirPath)); if (dir0 instanceof RAMDirectory) { ramDir = (RAMDirectory) dir0; } else { ramDir = null; } // TODO: NativeUnixDir? final String analyzer = args.getString("-analyzer"); final String tasksFile = args.getString("-taskSource"); final int searchThreadCount = args.getInt("-searchThreadCount"); final String fieldName = args.getString("-field"); final boolean printHeap = args.getFlag("-printHeap"); final boolean doPKLookup = args.getFlag("-pk"); final int topN = args.getInt("-topN"); final boolean doStoredLoads = args.getFlag("-loadStoredFields"); // Used to choose which random subset of tasks we will // run, to generate the PKLookup tasks, and to generate // any random pct filters: final long staticRandomSeed = args.getLong("-staticSeed"); // Used to shuffle the random subset of tasks: final long randomSeed = args.getLong("-seed"); // TODO: this could be way better. final String similarity = args.getString("-similarity"); // now reflect final Class<? extends Similarity> simClazz = Class .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class); final Similarity sim = simClazz.newInstance(); System.out.println("Using dir impl " + dir0.getClass().getName()); System.out.println("Analyzer " + analyzer); System.out.println("Similarity " + similarity); System.out.println("Search thread count " + searchThreadCount); System.out.println("topN " + topN); System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit"); System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes"); final Analyzer a; if (analyzer.equals("EnglishAnalyzer")) { a = new EnglishAnalyzer(); } else if (analyzer.equals("ClassicAnalyzer")) { a = new ClassicAnalyzer(); } else if (analyzer.equals("StandardAnalyzer")) { a = new StandardAnalyzer(); } else if (analyzer.equals("StandardAnalyzerNoStopWords")) { a = new StandardAnalyzer(CharArraySet.EMPTY_SET); } else if (analyzer.equals("ShingleStandardAnalyzer")) { a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN); } else { throw new RuntimeException("unknown analyzer " + analyzer); } final ReferenceManager<IndexSearcher> mgr; final IndexWriter writer; final Directory dir; final String commit = args.getString("-commit"); final String hiliteImpl = args.getString("-hiliteImpl"); final String logFile = args.getString("-log"); final long tSearcherStart = System.currentTimeMillis(); final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum"); final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes"); if (recacheFilterDeletes) { throw new UnsupportedOperationException("recacheFilterDeletes was deprecated"); } if (args.getFlag("-nrt")) { // TODO: get taxoReader working here too // TODO: factor out & share this CL processing w/ Indexer final int indexThreadCount = args.getInt("-indexThreadCount"); final String lineDocsFile = args.getString("-lineDocsFile"); final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread"); final float reopenEverySec = args.getFloat("-reopenEverySec"); final boolean storeBody = args.getFlag("-store"); final boolean tvsBody = args.getFlag("-tvs"); final boolean useCFS = args.getFlag("-cfs"); final String defaultPostingsFormat = args.getString("-postingsFormat"); final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat"); final boolean verbose = args.getFlag("-verbose"); final boolean cloneDocs = args.getFlag("-cloneDocs"); final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT)); final long reopenEveryMS = (long) (1000 * reopenEverySec); if (verbose) { InfoStream.setDefault(new PrintStreamInfoStream(System.out)); } if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) { System.out.println("Wrap NRTCachingDirectory"); dir0 = new NRTCachingDirectory(dir0, 20, 400.0); } dir = dir0; final IndexWriterConfig iwc = new IndexWriterConfig(a); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); iwc.setRAMBufferSizeMB(256.0); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); // TODO: also RAMDirExceptDirect...? need to // ... block deletes against wrapped FSDir? if (dirImpl.equals("RAMDirectory")) { // Let IW remove files only referenced by starting commit: iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy()); } if (commit != null && commit.length() > 0) { System.out.println("Opening writer on commit=" + commit); iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir)); } ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024); //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4); final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return PostingsFormat .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat); } }; iwc.setCodec(codec); final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); // Only let one merge run at a time... // ... but queue up up to 4, before index thread is stalled: cms.setMaxMergesAndThreads(4, 1); iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { final long t0 = System.currentTimeMillis(); //System.out.println("DO WARM: " + reader); IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.search(new TermQuery(new Term(fieldName, "united")), 10); final long t1 = System.currentTimeMillis(); System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took " + (t1 - t0) + " msec"); } }); writer = new IndexWriter(dir, iwc); System.out.println("Initial writer.maxDoc()=" + writer.maxDoc()); // TODO: add -nrtBodyPostingsOffsets instead of // hardwired false: boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE; LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs, null, null, null, addDVFields); IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs, indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1); threads.start(); mgr = new SearcherManager(writer, new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) { IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); return s; } }); System.out.println("reopen every " + reopenEverySec); Thread reopenThread = new Thread() { @Override public void run() { try { final long startMS = System.currentTimeMillis(); int reopenCount = 1; while (true) { final long sleepMS = startMS + (reopenCount * reopenEveryMS) - System.currentTimeMillis(); if (sleepMS < 0) { System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms"); } else { Thread.sleep(sleepMS); } Thread.sleep(sleepMS); mgr.maybeRefresh(); reopenCount++; IndexSearcher s = mgr.acquire(); try { if (ramDir != null) { System.out.println(String.format(Locale.ENGLISH, "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(), writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); //String[] l = ramDir.listAll(); //Arrays.sort(l); //for(String f : l) { //System.out.println(" " + f + ": " + ramDir.fileLength(f)); //} } else { System.out.println(String.format(Locale.ENGLISH, "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); } } finally { mgr.release(s); } } } catch (Exception e) { throw new RuntimeException(e); } } }; reopenThread.setName("ReopenThread"); reopenThread.setPriority(4 + Thread.currentThread().getPriority()); reopenThread.start(); } else { dir = dir0; writer = null; final DirectoryReader reader; if (commit != null && commit.length() > 0) { System.out.println("Opening searcher on commit=" + commit); reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir)); } else { // open last commit reader = DirectoryReader.open(dir); } IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes=" + (100. * reader.maxDoc() / reader.numDocs())); mgr = new SingleIndexSearcher(s); } System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT"); { IndexSearcher s = mgr.acquire(); try { System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc=" + s.getIndexReader().maxDoc() + ": " + s); } finally { mgr.release(s); } } //System.out.println("searcher=" + searcher); FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setHierarchical("Date", true); TaxonomyReader taxoReader; Path taxoPath = Paths.get(args.getString("-indexPath"), "facets"); Directory taxoDir = od.open(taxoPath); if (DirectoryReader.indexExists(taxoDir)) { taxoReader = new DirectoryTaxonomyReader(taxoDir); System.out.println("Taxonomy has " + taxoReader.getSize() + " ords"); } else { taxoReader = null; } final Random staticRandom = new Random(staticRandomSeed); final Random random = new Random(randomSeed); final DirectSpellChecker spellChecker = new DirectSpellChecker(); final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl, facetsConfig); final QueryParser queryParser = new QueryParser("body", a); TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom, doStoredLoads); final TaskSource tasks; if (tasksFile.startsWith("server:")) { int idx = tasksFile.indexOf(':', 8); if (idx == -1) { throw new RuntimeException( "server is missing the port; should be server:interface:port (got: " + tasksFile + ")"); } String iface = tasksFile.substring(7, idx); int port = Integer.valueOf(tasksFile.substring(1 + idx)); RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser); // nocommit must stop thread? tasks = remoteTasks; } else { // Load the tasks from a file: final int taskRepeatCount = args.getInt("-taskRepeatCount"); final int numTaskPerCat = args.getInt("-tasksPerCat"); tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat, taskRepeatCount, doPKLookup); System.out.println("Task repeat count " + taskRepeatCount); System.out.println("Tasks file " + tasksFile); System.out.println("Num task per cat " + numTaskPerCat); } args.check(); // Evil respeller: //spellChecker.setMinPrefix(0); //spellChecker.setMaxInspections(1024); final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount); Thread.sleep(10); final long startNanos = System.nanoTime(); taskThreads.start(); taskThreads.finish(); final long endNanos = System.nanoTime(); System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total"); final List<Task> allTasks = tasks.getAllTasks(); PrintStream out = new PrintStream(logFile); if (allTasks != null) { // Tasks were local: verify checksums: // indexState.setDocIDToID(); final Map<Task, Task> tasksSeen = new HashMap<Task, Task>(); out.println("\nResults for " + allTasks.size() + " tasks:"); boolean fail = false; for (final Task task : allTasks) { if (verifyCheckSum) { final Task other = tasksSeen.get(task); if (other != null) { if (task.checksum() != other.checksum()) { System.out.println("\nTASK:"); task.printResults(System.out, indexState); System.out.println("\nOTHER TASK:"); other.printResults(System.out, indexState); fail = true; //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other); } } else { tasksSeen.put(task, task); } } out.println("\nTASK: " + task); out.println(" " + (task.runTimeNanos / 1000000.0) + " msec"); out.println(" thread " + task.threadID); task.printResults(out, indexState); } if (fail) { throw new RuntimeException("some tasks got different results across different threads"); } allTasks.clear(); } mgr.close(); if (taxoReader != null) { taxoReader.close(); } if (writer != null) { // Don't actually commit any index changes: writer.rollback(); } dir.close(); if (printHeap) { // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html final Runtime runtime = Runtime.getRuntime(); long usedMem1 = PerfUtils.usedMemory(runtime); long usedMem2 = Long.MAX_VALUE; for (int iter = 0; iter < 10; iter++) { runtime.runFinalization(); runtime.gc(); Thread.yield(); Thread.sleep(100); usedMem2 = usedMem1; usedMem1 = PerfUtils.usedMemory(runtime); } out.println("\nHEAP: " + PerfUtils.usedMemory(runtime)); } out.close(); }