List of usage examples for org.apache.lucene.index LeafReader getLiveDocs
public abstract Bits getLiveDocs();
From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs) throws IOException { FixedBitSet checkBits;/*from w w w . j a va 2 s .c o m*/ final LeafReader reader = context.reader(); final int maxDoc = reader.maxDoc(); BitSet finalBits = new SparseFixedBitSet(maxDoc); if (acceptDocs == null) { checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs()); if (checkBits == null) { // all live checkBits = new FixedBitSet(maxDoc); checkBits.set(0, checkBits.length()); } } else { checkBits = BitsUtils.bits2FixedBitSet(acceptDocs); } @Nullable final Terms terms = reader.terms(this.field); if (terms != null) { final int termsDocCount = terms.getDocCount(); if (termsDocCount != 0) { if (termsDocCount == maxDoc) { // all matching finalBits = checkBits; } else { @Nullable final Terms t = reader.terms(this.field); if (t != null) { PostingsEnum pe = null; final TermsEnum te = t.iterator(null); int docId; while (te.next() != null) { pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE); while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (checkBits.getAndClear(docId)) { finalBits.set(docId); } } } } } } } return new BitDocIdSet(finalBits); }
From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java
License:Apache License
private boolean innerMoveNext() throws IOException { while (tryAdvanceDocIdSetIterator()) { LeafReader reader = currentLeaf.reader(); Bits liveDocs = reader.getLiveDocs(); int doc;/*from w w w .j a va2 s. co m*/ while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) { continue; } onDoc(doc, reader); return true; } currentDocIdSetIt = null; } clearState(); return false; }
From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java
License:Open Source License
protected static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException { TermsEnum termsEnum = terms.iterator(); if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them. return null; }//from w w w. j av a 2 s. c o m PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS || (liveDocs != null && liveDocs.get(postingsEnum.docID()))) { return null; } return reader.document(postingsEnum.docID()); }
From source file:org.apache.solr.search.facet.UnInvertedField.java
License:Apache License
public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException { super(field,//from ww w.j a va 2 s.c o m // threshold, over which we use set intersections instead of counting // to (1) save memory, and (2) speed up faceting. // Add 2 for testing purposes so that there will always be some terms under // the threshold even when the index is very // small. searcher.maxDoc() / 20 + 2, DEFAULT_INDEX_INTERVAL_BITS); final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field)); this.searcher = searcher; try { // TODO: it's wasteful to create one of these each time // but DocTermOrds will throw an exception if it thinks the field has doc values (which is faked by UnInvertingReader) LeafReader r = SlowCompositeReaderWrapper.wrap(searcher.getRawReader()); uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix)); } catch (IllegalStateException ise) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise); } if (tnums != null) { for (byte[] target : tnums) { if (target != null && target.length > (1 << 24) * .9) { log.warn("Approaching too many values for UnInvertedField faceting on field '" + field + "' : bucket size=" + target.length); } } } // free space if outrageously wasteful (tradeoff memory/cpu) if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste! int[] newMaxTermCounts = new int[numTermsInField]; System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField); maxTermCounts = newMaxTermCounts; } log.info("UnInverted multi-valued field " + toString()); //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testEmptyIndex() throws IOException { final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); iw.close();/*from ww w.j ava 2 s . co m*/ final DirectoryReader ir = DirectoryReader.open(dir); TestUtil.checkReader(ir); final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); TestUtil.checkReader(composite); // check the leaves // (normally there are none for an empty index, so this is really just future // proofing in case that changes for some reason) for (LeafReaderContext rc : ir.leaves()) { final LeafReader r = rc.reader(); final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field"); assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r)); assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount()); } // check the composite final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field"); assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite)); assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount()); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testSimple() throws Exception { Directory dir = newDirectory();/*from ww w . j ava 2 s . c om*/ final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newTextField("field", "", Field.Store.NO); doc.add(field); field.setStringValue("a b c"); w.addDocument(doc); field.setStringValue("d e f"); w.addDocument(doc); field.setStringValue("a f"); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final LeafReader ar = SlowCompositeReaderWrapper.wrap(r); TestUtil.checkReader(ar); final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); SortedSetDocValues iter = dto.iterator(ar); assertEquals(0, iter.nextDoc()); assertEquals(0, iter.nextOrd()); assertEquals(1, iter.nextOrd()); assertEquals(2, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); assertEquals(1, iter.nextDoc()); assertEquals(3, iter.nextOrd()); assertEquals(4, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); assertEquals(2, iter.nextDoc()); assertEquals(0, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); r.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception { final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE, TestUtil.nextInt(random(), 2, 10)); final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER); /*//from w w w. j a v a 2s. c o m for(int docID=0;docID<subR.maxDoc();docID++) { System.out.println(" docID=" + docID + " id=" + docIDToID[docID]); } */ if (VERBOSE) { System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString())); System.out.println("TEST: all TERMS:"); TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(); int ord = 0; while (allTE.next() != null) { System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString()); } } //final TermsEnum te = subR.fields().terms("field").iterator(); final TermsEnum te = dto.getOrdTermsEnum(r); if (dto.numTerms() == 0) { if (prefixRef == null) { assertNull(MultiFields.getTerms(r, "field")); } else { Terms terms = MultiFields.getTerms(r, "field"); if (terms != null) { TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef); if (result != TermsEnum.SeekStatus.END) { assertFalse( "term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef)); } else { // ok } } else { // ok } } return; } if (VERBOSE) { System.out.println("TEST: TERMS:"); te.seekExact(0); while (true) { System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString()); if (te.next() == null) { break; } } } SortedSetDocValues iter = dto.iterator(r); for (int docID = 0; docID < r.maxDoc(); docID++) { assertEquals(docID, docIDToID.nextDoc()); if (docID > iter.docID()) { iter.nextDoc(); } if (docID < iter.docID()) { int[] answers = idToOrds[(int) docIDToID.longValue()]; assertEquals(0, answers.length); continue; } if (VERBOSE) { System.out.println( "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")"); } final int[] answers = idToOrds[(int) docIDToID.longValue()]; int upto = 0; long ord; while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { te.seekExact(ord); final BytesRef expected = termsArray[answers[upto++]]; if (VERBOSE) { System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString()); } assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term()); } assertEquals(answers.length, upto); } }
From source file:org.codelibs.elasticsearch.common.lucene.uid.Versions.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise * </ul>// w w w. j a va 2s . c om */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME); List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return null; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReaderContext context = leaves.get(i); LeafReader leaf = context.reader(); PerThreadIDAndVersionLookup lookup = getLookupState(leaf); DocIdAndVersion result = lookup.lookup(term.bytes(), leaf.getLiveDocs(), context); if (result != null) { return result; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Load the internal doc ID and version for the uid from the reader, returning<ul> * <li>null if the uid wasn't found, * <li>a doc ID and a version otherwise * </ul>// w ww .j a va 2 s. co m */ public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME); List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return null; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReaderContext context = leaves.get(i); LeafReader leaf = context.reader(); PerThreadIDAndVersionLookup lookup = getLookupState(leaf); DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context); if (result != null) { return result; } } return null; }
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Returns the sequence number for the given uid term, returning * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found. *///ww w . j a v a 2 s . c om public static long loadSeqNo(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid"; List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return SequenceNumbersService.UNASSIGNED_SEQ_NO; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReader leaf = leaves.get(i).reader(); Bits liveDocs = leaf.getLiveDocs(); TermsEnum termsEnum = null; SortedNumericDocValues dvField = null; PostingsEnum docsEnum = null; final Fields fields = leaf.fields(); if (fields != null) { Terms terms = fields.terms(UidFieldMapper.NAME); if (terms != null) { termsEnum = terms.iterator(); assert termsEnum != null; dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME); assert dvField != null; final BytesRef id = term.bytes(); if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the // case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum .nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { dvField.setDocument(docID); assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count(); return dvField.valueAt(0); } } } } } return SequenceNumbersService.UNASSIGNED_SEQ_NO; }