Example usage for org.apache.lucene.index IndexReader leaves

List of usage examples for org.apache.lucene.index IndexReader leaves

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader leaves.

Prototype

public final List<LeafReaderContext> leaves() 

Source Link

Document

Returns the reader's leaves, or itself if this reader is atomic.

Usage

From source file:org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery.java

License:Apache License

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader)
        throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }/*w w w.  j  ava  2s.c o  m*/

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}

From source file:org.elasticsearch.common.lucene.uid.Versions.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise, the version being potentially set to {@link #NOT_SET} if the uid has no associated version
 * </ul>/*from   w w w  .  jav  a2s .  c  o m*/
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    final List<AtomicReaderContext> leaves = reader.leaves();
    for (int i = leaves.size() - 1; i >= 0; --i) {
        final DocIdAndVersion docIdAndVersion = loadDocIdAndVersion(leaves.get(i), term);
        if (docIdAndVersion != null) {
            assert docIdAndVersion.version != NOT_FOUND;
            return docIdAndVersion;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField)
        throws IOException {
    // We cache on the top level
    // This means cache entries have a shorter lifetime, maybe as low as 1s with the
    // default refresh interval and a steady indexing rate, but on the other hand it
    // proved to be cheaper than having to perform a CHM and a TL get for every segment.
    // See https://github.com/elastic/elasticsearch/pull/19856.
    IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
    CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = lookupStates.get(cacheHelper.getKey());
    if (ctl == null) {
        // First time we are seeing this reader's core; make a new CTL:
        ctl = new CloseableThreadLocal<>();
        CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = lookupStates
                .putIfAbsent(cacheHelper.getKey(), ctl);
        if (other == null) {
            // Our CTL won, we must remove it when the reader is closed:
            cacheHelper.addClosedListener(removeLookupState);
        } else {/* w ww . j a  va  2  s . c om*/
            // Another thread beat us to it: just use their CTL:
            ctl = other;
        }
    }

    PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get();
    if (lookupState == null) {
        lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()];
        for (LeafReaderContext leaf : reader.leaves()) {
            lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField);
        }
        ctl.set(lookupState);
    }

    if (lookupState.length != reader.leaves().size()) {
        throw new AssertionError(
                "Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size());
    }

    if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) {
        throw new AssertionError("Index does not consistently use the same uid field: [" + uidField + "] != ["
                + lookupState[0].uidField + "]");
    }

    return lookupState;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//from   w w  w . j a v a2  s . c  om
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java

License:Apache License

/**
 * Load the internal doc ID and sequence number for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and the associated seqNo otherwise
 * </ul>/*from  w  w w.  j av a  2 s  .c  om*/
 */
public static DocIdAndSeqNo loadDocIdAndSeqNo(IndexReader reader, Term term) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndSeqNo result = lookup.lookupSeqNo(term.bytes(), leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//  w  w  w.  jav a 2  s  .c o m
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the sequence number for the given uid term, returning
 * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
 *///from   w  ww. j a  v  a2s. co m
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got "
                                + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }

    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the primary term for the given uid term, returning {@code 0} if none is found.
 *//*  ww  w .j a  v  a2  s.  co m*/
public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return 0;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        NumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        return dvField.get(docID);
                    }
                }
            }
        }

    }
    return 0;
}

From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java

License:Apache License

private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException {
    int count = 0;
    for (LeafReaderContext ctx : reader.leaves()) {
        final BitSet bitSet = producer.getBitSet(ctx);
        if (bitSet != null) {
            count += bitSet.cardinality();
        }//from w w  w. ja  v  a2  s  .  c om
    }
    return count;
}

From source file:org.elasticsearch.index.engine.RamAccountingSearcherFactory.java

License:Apache License

@Override
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
    final CircuitBreaker breaker = breakerService.getBreaker(CircuitBreaker.ACCOUNTING);

    // Construct a list of the previous segment readers, we only want to track memory used
    // by new readers, so these will be exempted from the circuit breaking accounting.
    //// w w  w  .j  a v a  2 s  .co  m
    // The Core CacheKey is used as the key for the set so that deletions still keep the correct
    // accounting, as using the Reader or Reader's CacheKey causes incorrect accounting.
    final Set<IndexReader.CacheKey> prevReaders;
    if (previousReader == null) {
        prevReaders = Collections.emptySet();
    } else {
        final List<LeafReaderContext> previousReaderLeaves = previousReader.leaves();
        prevReaders = new HashSet<>(previousReaderLeaves.size());
        for (LeafReaderContext lrc : previousReaderLeaves) {
            prevReaders.add(Lucene.segmentReader(lrc.reader()).getCoreCacheHelper().getKey());
        }
    }

    for (LeafReaderContext lrc : reader.leaves()) {
        final SegmentReader segmentReader = Lucene.segmentReader(lrc.reader());
        // don't add the segment's memory unless it is not referenced by the previous reader
        // (only new segments)
        if (prevReaders.contains(segmentReader.getCoreCacheHelper().getKey()) == false) {
            final long ramBytesUsed = segmentReader.ramBytesUsed();
            // add the segment memory to the breaker (non-breaking)
            breaker.addWithoutBreaking(ramBytesUsed);
            // and register a listener for when the segment is closed to decrement the
            // breaker accounting
            segmentReader.getCoreCacheHelper()
                    .addClosedListener(k -> breaker.addWithoutBreaking(-ramBytesUsed));
        }
    }
    return super.newSearcher(reader, previousReader);
}