List of usage examples for org.apache.lucene.index IndexReader leaves
public final List<LeafReaderContext> leaves()
From source file:org.codelibs.elasticsearch.common.lucene.uid.Versions.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise * </ul>//ww w .ja v a2 s . co m */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME); List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return null; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReaderContext context = leaves.get(i); LeafReader leaf = context.reader(); PerThreadIDAndVersionLookup lookup = getLookupState(leaf); DocIdAndVersion result = lookup.lookup(term.bytes(), leaf.getLiveDocs(), context); if (result != null) { return result; } } return null; }
From source file:org.codelibs.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.java
License:Apache License
/** * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>O(numIndexedFields)</code>. * * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes * separately in the returned {CompletionStats} *//* ww w. j ava 2s . c o m*/ public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) { CompletionStats completionStats = new CompletionStats(); for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { LeafReader atomicReader = atomicReaderContext.reader(); try { Fields fields = atomicReader.fields(); for (String fieldName : fields) { Terms terms = fields.terms(fieldName); if (terms instanceof CompletionTerms) { CompletionTerms completionTerms = (CompletionTerms) terms; completionStats.add(completionTerms.stats(fieldNamePatterns)); } } } catch (IOException ioe) { logger.error("Could not get completion stats", ioe); } } return completionStats; }
From source file:org.deshang.content.indexing.scheduling.ContentIndexingTask.java
License:Apache License
private void calcPersonTermDocFreqInfo(TermDocFreqStatistics statistics, IndexReader reader) throws IOException { long docNum = reader.numDocs(); LOGGER.debug("Total number of documents is " + docNum + "."); List<AtomicReaderContext> atomicCtxList = reader.leaves(); for (AtomicReaderContext ctx : atomicCtxList) { FilterAtomicReader far = new FilterAtomicReader(ctx.reader()); for (String field : far.fields()) { Terms terms = far.fields().terms(field); LOGGER.debug("Reader [" + far.toString() + "] totally has " + terms.size() + " term(s)."); TermsEnum termsEnum = terms.iterator(null); BytesRef term = null;//from w ww .j a v a 2s . c om while ((term = termsEnum.next()) != null) { String termUtf8String = term.utf8ToString(); int existPersonDocFreq = statistics.getTermPersonDocFreq(termUtf8String); int personDocFreq = far.docFreq(new Term(field, term)); double personDocFreqPercent = ((double) personDocFreq) / docNum; if (existPersonDocFreq < 0) { personDocFreq += statistics.getTermPersonDocFreq(termUtf8String); personDocFreqPercent += statistics.getTermPersonDocFreqPercent(termUtf8String); } statistics.putTermPersonDocFreqInfo(termUtf8String, personDocFreq, personDocFreqPercent); } } far.close(); } }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
/** * Returns a Document representing the specified document ID (combination of resource and context), or * null when no such Document exists yet. *//*from ww w . j a v a2 s . c om*/ private Document getDocument(Term idTerm) throws IOException { IndexReader reader = getIndexReader(); List<LeafReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { LeafReader lreader = leaves.get(i).reader(); Document document = getDocument(lreader, idTerm); if (document != null) { return document; } } // no such Document return null; }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
/** * Returns a list of Documents representing the specified Resource (empty when no such Document exists * yet). Each document represent a set of statements with the specified Resource as a subject, which are * stored in a specific context/*from w w w. j a va 2 s . com*/ */ private List<Document> getDocuments(Term uriTerm) throws IOException { List<Document> result = new ArrayList<Document>(); IndexReader reader = getIndexReader(); List<LeafReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { LeafReader lreader = leaves.get(i).reader(); addDocuments(lreader, uriTerm, result); } return result; }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
private static boolean isDeleted(IndexReader reader, int docId) { if (reader.hasDeletions()) { List<LeafReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { Bits liveDocs = leaves.get(i).reader().getLiveDocs(); if (docId < liveDocs.length()) { boolean isDeleted = !liveDocs.get(docId); if (isDeleted) { return true; }/*from ww w .j a v a 2 s. co m*/ } } return false; } else { return false; } }
From source file:org.elasticsearch.action.allterms.TransportAllTermsShardAction.java
License:Apache License
@Override protected AllTermsSingleShardResponse shardOperation(AllTermsShardRequest request, ShardId shardId) throws ElasticsearchException { List<String> terms = new ArrayList<>(); IndexService indexService = indicesService.indexServiceSafe(request.index()); IndexShard indexShard = indexService.shardSafe(shardId.id()); final Engine.Searcher searcher = indexShard.acquireSearcher("all_terms"); IndexReader topLevelReader = searcher.reader(); List<AtomicReaderContext> leaves = topLevelReader.leaves(); try {/*from www .j av a 2s. com*/ if (leaves.size() == 0) { return new AllTermsSingleShardResponse(terms); } List<TermsEnum> termIters = new ArrayList<>(); try { for (AtomicReaderContext reader : leaves) { termIters.add(reader.reader().terms(request.field()).iterator(null)); } } catch (IOException e) { } CharsRefBuilder spare = new CharsRefBuilder(); BytesRef lastTerm = null; int[] exhausted = new int[termIters.size()]; for (int i = 0; i < exhausted.length; i++) { exhausted[i] = 0; } try { //first find smallest term for (int i = 0; i < termIters.size(); i++) { BytesRef curTerm = null; if (request.from() != null) { TermsEnum.SeekStatus seekStatus = termIters.get(i).seekCeil(new BytesRef(request.from())); if (seekStatus.equals(TermsEnum.SeekStatus.END) == false) { curTerm = termIters.get(i).term(); } } else { curTerm = termIters.get(i).next(); } if (lastTerm == null) { lastTerm = curTerm; if (lastTerm == null || lastTerm.length == 0) { lastTerm = null; exhausted[i] = 1; } } else { if (curTerm.compareTo(lastTerm) < 0) { lastTerm = curTerm; } } } if (lastTerm == null) { return new AllTermsSingleShardResponse(terms); } if (getDocFreq(termIters, lastTerm, request.field(), exhausted) >= request.minDocFreq()) { spare.copyUTF8Bytes(lastTerm); terms.add(spare.toString()); } BytesRef blah = new BytesRef(); blah.copyBytes(lastTerm); lastTerm = blah; while (terms.size() < request.size() && lastTerm != null) { moveIterators(exhausted, termIters, lastTerm, shardId); lastTerm = findMinimum(exhausted, termIters, shardId); if (lastTerm != null) { if (getDocFreq(termIters, lastTerm, request.field(), exhausted) >= request.minDocFreq()) { spare.copyUTF8Bytes(lastTerm); terms.add(spare.toString()); } } } } catch (IOException e) { } logger.trace("[{}], final terms list: {}", shardId, terms); return new AllTermsSingleShardResponse(terms); } finally { searcher.close(); } }
From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java
License:Apache License
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException { if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) { throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag); }//from ww w . java 2 s . co m this.docsEnumFlag = docsEnumFlag; if (filter == null) { // Important - need to use the doc count that includes deleted docs // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951 numDocs = reader.maxDoc(); } List<LeafReaderContext> leaves = reader.leaves(); List<Holder> enums = new ArrayList<>(leaves.size()); final Weight weight; if (filter == null) { weight = null; } else { final IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); weight = searcher.createNormalizedWeight(filter, false); } for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { continue; } BitSet bits = null; if (weight != null) { Scorer scorer = weight.scorer(context); if (scorer == null) { // fully filtered, none matching, no need to iterate on this continue; } DocIdSetIterator docs = scorer.iterator(); // we want to force apply deleted docs final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { docs = new FilteredDocIdSetIterator(docs) { @Override protected boolean match(int doc) { return liveDocs.get(doc); } }; } BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc()); builder.or(docs); bits = builder.build().bits(); // Count how many docs are in our filtered set // TODO make this lazy-loaded only for those that need it? numDocs += bits.cardinality(); } enums.add(new Holder(termsEnum, bits)); } this.enums = enums.toArray(new Holder[enums.size()]); }
From source file:org.elasticsearch.common.lucene.IndexCacheableQueryTests.java
License:Apache License
public void testCache() throws IOException { Directory dir = newDirectory();/* ww w. ja va 2 s. c om*/ LRUQueryCache cache = new LRUQueryCache(10000, Long.MAX_VALUE); QueryCachingPolicy policy = QueryCachingPolicy.ALWAYS_CACHE; RandomIndexWriter writer = new RandomIndexWriter(getRandom(), dir); for (int i = 0; i < 10; ++i) { writer.addDocument(new Document()); } IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); reader = searcher.getIndexReader(); // reader might be wrapped searcher.setQueryCache(cache); searcher.setQueryCachingPolicy(policy); assertEquals(0, cache.getCacheSize()); DummyIndexCacheableQuery query = new DummyIndexCacheableQuery(); searcher.count(query); int expectedCacheSize = reader.leaves().size(); assertEquals(expectedCacheSize, cache.getCacheSize()); searcher.count(query); assertEquals(expectedCacheSize, cache.getCacheSize()); writer.addDocument(new Document()); IndexReader reader2 = writer.getReader(); searcher = newSearcher(reader2); reader2 = searcher.getIndexReader(); // reader might be wrapped searcher.setQueryCache(cache); searcher.setQueryCachingPolicy(policy); // since the query is only cacheable at the index level, it has to be recomputed on all leaves expectedCacheSize += reader2.leaves().size(); searcher.count(query); assertEquals(expectedCacheSize, cache.getCacheSize()); searcher.count(query); assertEquals(expectedCacheSize, cache.getCacheSize()); reader.close(); reader2.close(); writer.close(); assertEquals(0, cache.getCacheSize()); dir.close(); }
From source file:org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery.java
License:Apache License
private void getPrefixTerms(ObjectOpenHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. TermsEnum termsEnum = null;//ww w . ja va 2 s. c o m List<AtomicReaderContext> leaves = reader.leaves(); for (AtomicReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } termsEnum = _terms.iterator(termsEnum); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }