List of usage examples for org.apache.lucene.index IndexReader leaves
public final List<LeafReaderContext> leaves()
From source file:org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery.java
License:Apache License
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; }/*w w w. j ava 2s.c o m*/ TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
From source file:org.elasticsearch.common.lucene.uid.Versions.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise, the version being potentially set to {@link #NOT_SET} if the uid has no associated version * </ul>/*from w w w . jav a2s . c o m*/ */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments final List<AtomicReaderContext> leaves = reader.leaves(); for (int i = leaves.size() - 1; i >= 0; --i) { final DocIdAndVersion docIdAndVersion = loadDocIdAndVersion(leaves.get(i), term); if (docIdAndVersion != null) { assert docIdAndVersion.version != NOT_FOUND; return docIdAndVersion; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java
License:Apache License
private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField) throws IOException { // We cache on the top level // This means cache entries have a shorter lifetime, maybe as low as 1s with the // default refresh interval and a steady indexing rate, but on the other hand it // proved to be cheaper than having to perform a CHM and a TL get for every segment. // See https://github.com/elastic/elasticsearch/pull/19856. IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper(); CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = lookupStates.get(cacheHelper.getKey()); if (ctl == null) { // First time we are seeing this reader's core; make a new CTL: ctl = new CloseableThreadLocal<>(); CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = lookupStates .putIfAbsent(cacheHelper.getKey(), ctl); if (other == null) { // Our CTL won, we must remove it when the reader is closed: cacheHelper.addClosedListener(removeLookupState); } else {/* w ww . j a va 2 s . c om*/ // Another thread beat us to it: just use their CTL: ctl = other; } } PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get(); if (lookupState == null) { lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()]; for (LeafReaderContext leaf : reader.leaves()) { lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField); } ctl.set(lookupState); } if (lookupState.length != reader.leaves().size()) { throw new AssertionError( "Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size()); } if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) { throw new AssertionError("Index does not consistently use the same uid field: [" + uidField + "] != [" + lookupState[0].uidField + "]"); } return lookupState; }
From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise * </ul>//from w w w . j a v a2 s . c om */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field()); List<LeafReaderContext> leaves = reader.leaves(); // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { final LeafReaderContext leaf = leaves.get(i); PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord]; DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf); if (result != null) { return result; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.java
License:Apache License
/** * Load the internal doc ID and sequence number for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and the associated seqNo otherwise * </ul>/*from w w w. j av a 2 s .c om*/ */ public static DocIdAndSeqNo loadDocIdAndSeqNo(IndexReader reader, Term term) throws IOException { PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field()); List<LeafReaderContext> leaves = reader.leaves(); // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { final LeafReaderContext leaf = leaves.get(i); PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord]; DocIdAndSeqNo result = lookup.lookupSeqNo(term.bytes(), leaf); if (result != null) { return result; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise * </ul>// w w w. jav a 2 s .c o m */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME); List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return null; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReaderContext context = leaves.get(i); LeafReader leaf = context.reader(); PerThreadIDAndVersionLookup lookup = getLookupState(leaf); DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context); if (result != null) { return result; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Returns the sequence number for the given uid term, returning * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found. *///from w ww. j a v a2s. co m public static long loadSeqNo(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid"; List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return SequenceNumbersService.UNASSIGNED_SEQ_NO; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReader leaf = leaves.get(i).reader(); Bits liveDocs = leaf.getLiveDocs(); TermsEnum termsEnum = null; SortedNumericDocValues dvField = null; PostingsEnum docsEnum = null; final Fields fields = leaf.fields(); if (fields != null) { Terms terms = fields.terms(UidFieldMapper.NAME); if (terms != null) { termsEnum = terms.iterator(); assert termsEnum != null; dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME); assert dvField != null; final BytesRef id = term.bytes(); if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the // case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum .nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { dvField.setDocument(docID); assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count(); return dvField.valueAt(0); } } } } } return SequenceNumbersService.UNASSIGNED_SEQ_NO; }
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Returns the primary term for the given uid term, returning {@code 0} if none is found. *//* ww w .j a v a2 s. co m*/ public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid"; List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return 0; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReader leaf = leaves.get(i).reader(); Bits liveDocs = leaf.getLiveDocs(); TermsEnum termsEnum = null; NumericDocValues dvField = null; PostingsEnum docsEnum = null; final Fields fields = leaf.fields(); if (fields != null) { Terms terms = fields.terms(UidFieldMapper.NAME); if (terms != null) { termsEnum = terms.iterator(); assert termsEnum != null; dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME); assert dvField != null; final BytesRef id = term.bytes(); if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the // case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum .nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { return dvField.get(docID); } } } } } return 0; }
From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java
License:Apache License
private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException { int count = 0; for (LeafReaderContext ctx : reader.leaves()) { final BitSet bitSet = producer.getBitSet(ctx); if (bitSet != null) { count += bitSet.cardinality(); }//from w w w. ja v a2 s . c om } return count; }
From source file:org.elasticsearch.index.engine.RamAccountingSearcherFactory.java
License:Apache License
@Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException { final CircuitBreaker breaker = breakerService.getBreaker(CircuitBreaker.ACCOUNTING); // Construct a list of the previous segment readers, we only want to track memory used // by new readers, so these will be exempted from the circuit breaking accounting. //// w w w .j a v a 2 s .co m // The Core CacheKey is used as the key for the set so that deletions still keep the correct // accounting, as using the Reader or Reader's CacheKey causes incorrect accounting. final Set<IndexReader.CacheKey> prevReaders; if (previousReader == null) { prevReaders = Collections.emptySet(); } else { final List<LeafReaderContext> previousReaderLeaves = previousReader.leaves(); prevReaders = new HashSet<>(previousReaderLeaves.size()); for (LeafReaderContext lrc : previousReaderLeaves) { prevReaders.add(Lucene.segmentReader(lrc.reader()).getCoreCacheHelper().getKey()); } } for (LeafReaderContext lrc : reader.leaves()) { final SegmentReader segmentReader = Lucene.segmentReader(lrc.reader()); // don't add the segment's memory unless it is not referenced by the previous reader // (only new segments) if (prevReaders.contains(segmentReader.getCoreCacheHelper().getKey()) == false) { final long ramBytesUsed = segmentReader.ramBytesUsed(); // add the segment memory to the breaker (non-breaking) breaker.addWithoutBreaking(ramBytesUsed); // and register a listener for when the segment is closed to decrement the // breaker accounting segmentReader.getCoreCacheHelper() .addClosedListener(k -> breaker.addWithoutBreaking(-ramBytesUsed)); } } return super.newSearcher(reader, previousReader); }