Example usage for org.apache.lucene.index IndexReader leaves

List of usage examples for org.apache.lucene.index IndexReader leaves

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader leaves.

Prototype

public final List<LeafReaderContext> leaves() 

Source Link

Document

Returns the reader's leaves, or itself if this reader is atomic.

Usage

From source file:org.codelibs.elasticsearch.common.lucene.uid.Versions.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//ww w  .ja  v a2  s . co  m
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookup(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.codelibs.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.java

License:Apache License

/**
 * Returns total in-heap bytes used by all suggesters.  This method has CPU cost <code>O(numIndexedFields)</code>.
 *
 * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
 * separately in the returned {CompletionStats}
 *//*  ww  w. j  ava  2s  .  c  o m*/
public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) {
    CompletionStats completionStats = new CompletionStats();
    for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
        LeafReader atomicReader = atomicReaderContext.reader();
        try {
            Fields fields = atomicReader.fields();
            for (String fieldName : fields) {
                Terms terms = fields.terms(fieldName);
                if (terms instanceof CompletionTerms) {
                    CompletionTerms completionTerms = (CompletionTerms) terms;
                    completionStats.add(completionTerms.stats(fieldNamePatterns));
                }
            }
        } catch (IOException ioe) {
            logger.error("Could not get completion stats", ioe);
        }
    }

    return completionStats;
}

From source file:org.deshang.content.indexing.scheduling.ContentIndexingTask.java

License:Apache License

private void calcPersonTermDocFreqInfo(TermDocFreqStatistics statistics, IndexReader reader)
        throws IOException {
    long docNum = reader.numDocs();
    LOGGER.debug("Total number of documents is " + docNum + ".");
    List<AtomicReaderContext> atomicCtxList = reader.leaves();
    for (AtomicReaderContext ctx : atomicCtxList) {
        FilterAtomicReader far = new FilterAtomicReader(ctx.reader());
        for (String field : far.fields()) {
            Terms terms = far.fields().terms(field);
            LOGGER.debug("Reader [" + far.toString() + "] totally has " + terms.size() + " term(s).");
            TermsEnum termsEnum = terms.iterator(null);
            BytesRef term = null;//from   w  ww  .j  a v a 2s . c om
            while ((term = termsEnum.next()) != null) {
                String termUtf8String = term.utf8ToString();
                int existPersonDocFreq = statistics.getTermPersonDocFreq(termUtf8String);
                int personDocFreq = far.docFreq(new Term(field, term));
                double personDocFreqPercent = ((double) personDocFreq) / docNum;
                if (existPersonDocFreq < 0) {
                    personDocFreq += statistics.getTermPersonDocFreq(termUtf8String);
                    personDocFreqPercent += statistics.getTermPersonDocFreqPercent(termUtf8String);
                }
                statistics.putTermPersonDocFreqInfo(termUtf8String, personDocFreq, personDocFreqPercent);
            }
        }
        far.close();
    }
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

/**
 * Returns a Document representing the specified document ID (combination of resource and context), or
 * null when no such Document exists yet.
 *//*from  ww w  .  j a v a2  s  . c om*/
private Document getDocument(Term idTerm) throws IOException {
    IndexReader reader = getIndexReader();
    List<LeafReaderContext> leaves = reader.leaves();
    int size = leaves.size();
    for (int i = 0; i < size; i++) {
        LeafReader lreader = leaves.get(i).reader();
        Document document = getDocument(lreader, idTerm);
        if (document != null) {
            return document;
        }
    }
    // no such Document
    return null;
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

/**
 * Returns a list of Documents representing the specified Resource (empty when no such Document exists
 * yet). Each document represent a set of statements with the specified Resource as a subject, which are
 * stored in a specific context/*from  w w w. j  a va  2  s . com*/
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
    List<Document> result = new ArrayList<Document>();

    IndexReader reader = getIndexReader();
    List<LeafReaderContext> leaves = reader.leaves();
    int size = leaves.size();
    for (int i = 0; i < size; i++) {
        LeafReader lreader = leaves.get(i).reader();
        addDocuments(lreader, uriTerm, result);
    }

    return result;
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private static boolean isDeleted(IndexReader reader, int docId) {
    if (reader.hasDeletions()) {
        List<LeafReaderContext> leaves = reader.leaves();
        int size = leaves.size();
        for (int i = 0; i < size; i++) {
            Bits liveDocs = leaves.get(i).reader().getLiveDocs();
            if (docId < liveDocs.length()) {
                boolean isDeleted = !liveDocs.get(docId);
                if (isDeleted) {
                    return true;
                }/*from ww  w  .j a  v  a  2  s. co m*/
            }
        }
        return false;
    } else {
        return false;
    }
}

From source file:org.elasticsearch.action.allterms.TransportAllTermsShardAction.java

License:Apache License

@Override
protected AllTermsSingleShardResponse shardOperation(AllTermsShardRequest request, ShardId shardId)
        throws ElasticsearchException {
    List<String> terms = new ArrayList<>();
    IndexService indexService = indicesService.indexServiceSafe(request.index());
    IndexShard indexShard = indexService.shardSafe(shardId.id());
    final Engine.Searcher searcher = indexShard.acquireSearcher("all_terms");
    IndexReader topLevelReader = searcher.reader();

    List<AtomicReaderContext> leaves = topLevelReader.leaves();

    try {/*from www .j av a 2s. com*/
        if (leaves.size() == 0) {
            return new AllTermsSingleShardResponse(terms);
        }
        List<TermsEnum> termIters = new ArrayList<>();

        try {
            for (AtomicReaderContext reader : leaves) {
                termIters.add(reader.reader().terms(request.field()).iterator(null));
            }
        } catch (IOException e) {
        }
        CharsRefBuilder spare = new CharsRefBuilder();
        BytesRef lastTerm = null;
        int[] exhausted = new int[termIters.size()];
        for (int i = 0; i < exhausted.length; i++) {
            exhausted[i] = 0;
        }
        try {
            //first find smallest term
            for (int i = 0; i < termIters.size(); i++) {
                BytesRef curTerm = null;
                if (request.from() != null) {
                    TermsEnum.SeekStatus seekStatus = termIters.get(i).seekCeil(new BytesRef(request.from()));
                    if (seekStatus.equals(TermsEnum.SeekStatus.END) == false) {
                        curTerm = termIters.get(i).term();
                    }
                } else {
                    curTerm = termIters.get(i).next();
                }

                if (lastTerm == null) {
                    lastTerm = curTerm;
                    if (lastTerm == null || lastTerm.length == 0) {
                        lastTerm = null;
                        exhausted[i] = 1;
                    }
                } else {
                    if (curTerm.compareTo(lastTerm) < 0) {
                        lastTerm = curTerm;
                    }
                }
            }
            if (lastTerm == null) {
                return new AllTermsSingleShardResponse(terms);
            }
            if (getDocFreq(termIters, lastTerm, request.field(), exhausted) >= request.minDocFreq()) {
                spare.copyUTF8Bytes(lastTerm);
                terms.add(spare.toString());
            }
            BytesRef blah = new BytesRef();
            blah.copyBytes(lastTerm);
            lastTerm = blah;

            while (terms.size() < request.size() && lastTerm != null) {
                moveIterators(exhausted, termIters, lastTerm, shardId);
                lastTerm = findMinimum(exhausted, termIters, shardId);

                if (lastTerm != null) {

                    if (getDocFreq(termIters, lastTerm, request.field(), exhausted) >= request.minDocFreq()) {
                        spare.copyUTF8Bytes(lastTerm);
                        terms.add(spare.toString());
                    }
                }
            }
        } catch (IOException e) {
        }

        logger.trace("[{}], final terms list: {}", shardId, terms);

        return new AllTermsSingleShardResponse(terms);
    } finally {
        searcher.close();
    }
}

From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }//from  ww  w .  java 2  s  . co  m
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.elasticsearch.common.lucene.IndexCacheableQueryTests.java

License:Apache License

public void testCache() throws IOException {
    Directory dir = newDirectory();/* ww  w. ja  va 2 s.  c  om*/
    LRUQueryCache cache = new LRUQueryCache(10000, Long.MAX_VALUE);
    QueryCachingPolicy policy = QueryCachingPolicy.ALWAYS_CACHE;
    RandomIndexWriter writer = new RandomIndexWriter(getRandom(), dir);
    for (int i = 0; i < 10; ++i) {
        writer.addDocument(new Document());
    }

    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    reader = searcher.getIndexReader(); // reader might be wrapped
    searcher.setQueryCache(cache);
    searcher.setQueryCachingPolicy(policy);

    assertEquals(0, cache.getCacheSize());
    DummyIndexCacheableQuery query = new DummyIndexCacheableQuery();
    searcher.count(query);
    int expectedCacheSize = reader.leaves().size();
    assertEquals(expectedCacheSize, cache.getCacheSize());
    searcher.count(query);
    assertEquals(expectedCacheSize, cache.getCacheSize());

    writer.addDocument(new Document());

    IndexReader reader2 = writer.getReader();
    searcher = newSearcher(reader2);
    reader2 = searcher.getIndexReader(); // reader might be wrapped
    searcher.setQueryCache(cache);
    searcher.setQueryCachingPolicy(policy);

    // since the query is only cacheable at the index level, it has to be recomputed on all leaves
    expectedCacheSize += reader2.leaves().size();
    searcher.count(query);
    assertEquals(expectedCacheSize, cache.getCacheSize());
    searcher.count(query);
    assertEquals(expectedCacheSize, cache.getCacheSize());

    reader.close();
    reader2.close();
    writer.close();
    assertEquals(0, cache.getCacheSize());
    dir.close();
}

From source file:org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery.java

License:Apache License

private void getPrefixTerms(ObjectOpenHashSet<Term> terms, final Term prefix, final IndexReader reader)
        throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    TermsEnum termsEnum = null;//ww  w  .  ja va 2 s.  c  o m
    List<AtomicReaderContext> leaves = reader.leaves();
    for (AtomicReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        termsEnum = _terms.iterator(termsEnum);
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}