Example usage for org.apache.lucene.index LeafReader getLiveDocs

List of usage examples for org.apache.lucene.index LeafReader getLiveDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getLiveDocs.

Prototype

public abstract Bits getLiveDocs();

Source Link

Document

Returns the Bits representing live (not deleted) docs.

Usage

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;/*from   w  w w  .  j a  va 2 s .c  o  m*/
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java

License:Apache License

private boolean innerMoveNext() throws IOException {
    while (tryAdvanceDocIdSetIterator()) {
        LeafReader reader = currentLeaf.reader();
        Bits liveDocs = reader.getLiveDocs();
        int doc;/*from   w  w  w  .j  a va2  s.  co  m*/
        while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
                continue;
            }
            onDoc(doc, reader);
            return true;
        }
        currentDocIdSetIt = null;
    }
    clearState();
    return false;
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

protected static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
        return null;
    }//from   w w  w. j av a 2  s. c o  m
    PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS
            || (liveDocs != null && liveDocs.get(postingsEnum.docID()))) {
        return null;
    }
    return reader.document(postingsEnum.docID());
}

From source file:org.apache.solr.search.facet.UnInvertedField.java

License:Apache License

public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
    super(field,//from   ww  w.j  a va  2 s.c  o m
            // threshold, over which we use set intersections instead of counting
            // to (1) save memory, and (2) speed up faceting.
            // Add 2 for testing purposes so that there will always be some terms under
            // the threshold even when the index is very
            // small.
            searcher.maxDoc() / 20 + 2, DEFAULT_INDEX_INTERVAL_BITS);

    final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field));
    this.searcher = searcher;
    try {
        // TODO: it's wasteful to create one of these each time
        // but DocTermOrds will throw an exception if it thinks the field has doc values (which is faked by UnInvertingReader)
        LeafReader r = SlowCompositeReaderWrapper.wrap(searcher.getRawReader());
        uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix));
    } catch (IllegalStateException ise) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise);
    }
    if (tnums != null) {
        for (byte[] target : tnums) {
            if (target != null && target.length > (1 << 24) * .9) {
                log.warn("Approaching too many values for UnInvertedField faceting on field '" + field
                        + "' : bucket size=" + target.length);
            }
        }
    }

    // free space if outrageously wasteful (tradeoff memory/cpu) 
    if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
        int[] newMaxTermCounts = new int[numTermsInField];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
        maxTermCounts = newMaxTermCounts;
    }

    log.info("UnInverted multi-valued field " + toString());
    //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index);
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testEmptyIndex() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    iw.close();/*from  ww  w.j  ava 2 s  .  co m*/

    final DirectoryReader ir = DirectoryReader.open(dir);
    TestUtil.checkReader(ir);

    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    // check the leaves
    // (normally there are none for an empty index, so this is really just future
    // proofing in case that changes for some reason)
    for (LeafReaderContext rc : ir.leaves()) {
        final LeafReader r = rc.reader();
        final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
        assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
        assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
    }

    // check the composite 
    final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
    assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
    assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testSimple() throws Exception {
    Directory dir = newDirectory();/*from  ww w  .  j  ava  2 s  . c  om*/
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);

    field.setStringValue("d e f");
    w.addDocument(doc);

    field.setStringValue("a f");
    w.addDocument(doc);

    final IndexReader r = w.getReader();
    w.close();

    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);

    assertEquals(0, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    assertEquals(1, iter.nextDoc());
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    assertEquals(2, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    r.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        throws Exception {

    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE,
            TestUtil.nextInt(random(), 2, 10));

    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    /*//from  w  w  w. j  a  v a 2s. c  o  m
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }

    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse(
                            "term=" + termsEnum.term().utf8ToString() + " matches prefix="
                                    + prefixRef.utf8ToString(),
                            StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                    // ok
                }
            } else {
                // ok
            }
        }
        return;
    }

    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }

        if (VERBOSE) {
            System.out.println(
                    "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord="
                    + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.uid.Versions.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//  w w w.  j  a  va 2s  .  c om
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookup(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>// w ww .j a  va 2 s. co  m
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the sequence number for the given uid term, returning
 * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
 *///ww w .  j  a v  a  2 s . c om
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got "
                                + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }

    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}