Example usage for org.apache.lucene.index IndexReader leaves

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader leaves.

Prototype

public final List<LeafReaderContext> leaves()

Source Link

Document

Returns the reader's leaves, or itself if this reader is atomic.

Usage

From source file:org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery.java

License:Apache License

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader)
        throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }/*w w w.  j  ava  2s.c o  m*/

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}

From source file:org.elasticsearch.common.lucene.uid.Versions.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise, the version being potentially set to {@link #NOT_SET} if the uid has no associated version
 * </ul>/*from   w w w  .  jav  a2s .  c  o m*/
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    final List<AtomicReaderContext> leaves = reader.leaves();
    for (int i = leaves.size() - 1; i >= 0; --i) {
        final DocIdAndVersion docIdAndVersion = loadDocIdAndVersion(leaves.get(i), term);
        if (docIdAndVersion != null) {
            assert docIdAndVersion.version != NOT_FOUND;
            return docIdAndVersion;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField)
        throws IOException {
    // We cache on the top level
    // This means cache entries have a shorter lifetime, maybe as low as 1s with the
    // default refresh interval and a steady indexing rate, but on the other hand it
    // proved to be cheaper than having to perform a CHM and a TL get for every segment.
    // See https://github.com/elastic/elasticsearch/pull/19856.
    IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
    CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = lookupStates.get(cacheHelper.getKey());
    if (ctl == null) {
        // First time we are seeing this reader's core; make a new CTL:
        ctl = new CloseableThreadLocal<>();
        CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = lookupStates
                .putIfAbsent(cacheHelper.getKey(), ctl);
        if (other == null) {
            // Our CTL won, we must remove it when the reader is closed:
            cacheHelper.addClosedListener(removeLookupState);
        } else {/* w ww . j a  va  2  s . c om*/
            // Another thread beat us to it: just use their CTL:
            ctl = other;
        }
    }

    PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get();
    if (lookupState == null) {
        lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()];
        for (LeafReaderContext leaf : reader.leaves()) {
            lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField);
        }
        ctl.set(lookupState);
    }

    if (lookupState.length != reader.leaves().size()) {
        throw new AssertionError(
                "Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size());
    }

    if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) {
        throw new AssertionError("Index does not consistently use the same uid field: [" + uidField + "] != ["
                + lookupState[0].uidField + "]");
    }

    return lookupState;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//from   w w  w . j a v a2  s . c  om
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

/**
 * Load the internal doc ID and sequence number for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and the associated seqNo otherwise
 * </ul>/*from  w  w w.  j av a  2 s  .c  om*/
 */
public static DocIdAndSeqNo loadDocIdAndSeqNo(IndexReader reader, Term term) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndSeqNo result = lookup.lookupSeqNo(term.bytes(), leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//  w  w  w.  jav a 2  s  .c o m
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the sequence number for the given uid term, returning
 * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
 *///from   w  ww. j a  v  a2s. co m
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got "
                                + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }

    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the primary term for the given uid term, returning {@code 0} if none is found.
 *//*  ww  w .j a  v  a2  s.  co m*/
public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return 0;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        NumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        return dvField.get(docID);
                    }
                }
            }
        }

    }
    return 0;
}

From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java

License:Apache License

private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException {
    int count = 0;
    for (LeafReaderContext ctx : reader.leaves()) {
        final BitSet bitSet = producer.getBitSet(ctx);
        if (bitSet != null) {
            count += bitSet.cardinality();
        }//from w w  w. ja  v  a2  s  .  c om
    }
    return count;
}

From source file:org.elasticsearch.index.engine.RamAccountingSearcherFactory.java

License:Apache License

@Override
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
    final CircuitBreaker breaker = breakerService.getBreaker(CircuitBreaker.ACCOUNTING);

    // Construct a list of the previous segment readers, we only want to track memory used
    // by new readers, so these will be exempted from the circuit breaking accounting.
    //// w w  w  .j  a v a  2 s  .co  m
    // The Core CacheKey is used as the key for the set so that deletions still keep the correct
    // accounting, as using the Reader or Reader's CacheKey causes incorrect accounting.
    final Set<IndexReader.CacheKey> prevReaders;
    if (previousReader == null) {
        prevReaders = Collections.emptySet();
    } else {
        final List<LeafReaderContext> previousReaderLeaves = previousReader.leaves();
        prevReaders = new HashSet<>(previousReaderLeaves.size());
        for (LeafReaderContext lrc : previousReaderLeaves) {
            prevReaders.add(Lucene.segmentReader(lrc.reader()).getCoreCacheHelper().getKey());
        }
    }

    for (LeafReaderContext lrc : reader.leaves()) {
        final SegmentReader segmentReader = Lucene.segmentReader(lrc.reader());
        // don't add the segment's memory unless it is not referenced by the previous reader
        // (only new segments)
        if (prevReaders.contains(segmentReader.getCoreCacheHelper().getKey()) == false) {
            final long ramBytesUsed = segmentReader.ramBytesUsed();
            // add the segment memory to the breaker (non-breaking)
            breaker.addWithoutBreaking(ramBytesUsed);
            // and register a listener for when the segment is closed to decrement the
            // breaker accounting
            segmentReader.getCoreCacheHelper()
                    .addClosedListener(k -> breaker.addWithoutBreaking(-ramBytesUsed));
        }
    }
    return super.newSearcher(reader, previousReader);
}