Example usage for org.apache.lucene.index LeafReader getLiveDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getLiveDocs.

Prototype

public abstract Bits getLiveDocs();

Source Link

Document

Returns the Bits representing live (not deleted) docs.

Usage

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;/*from   w  w w  .  j a  va 2 s .c  o  m*/
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java

License:Apache License

private boolean innerMoveNext() throws IOException {
    while (tryAdvanceDocIdSetIterator()) {
        LeafReader reader = currentLeaf.reader();
        Bits liveDocs = reader.getLiveDocs();
        int doc;/*from   w  w  w  .j  a va2  s.  co  m*/
        while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
                continue;
            }
            onDoc(doc, reader);
            return true;
        }
        currentDocIdSetIt = null;
    }
    clearState();
    return false;
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

protected static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
        return null;
    }//from   w w  w. j av a 2  s. c o  m
    PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS
            || (liveDocs != null && liveDocs.get(postingsEnum.docID()))) {
        return null;
    }
    return reader.document(postingsEnum.docID());
}

From source file:org.apache.solr.search.facet.UnInvertedField.java

License:Apache License

public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
    super(field,//from   ww  w.j  a va  2 s.c  o m
            // threshold, over which we use set intersections instead of counting
            // to (1) save memory, and (2) speed up faceting.
            // Add 2 for testing purposes so that there will always be some terms under
            // the threshold even when the index is very
            // small.
            searcher.maxDoc() / 20 + 2, DEFAULT_INDEX_INTERVAL_BITS);

    final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field));
    this.searcher = searcher;
    try {
        // TODO: it's wasteful to create one of these each time
        // but DocTermOrds will throw an exception if it thinks the field has doc values (which is faked by UnInvertingReader)
        LeafReader r = SlowCompositeReaderWrapper.wrap(searcher.getRawReader());
        uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix));
    } catch (IllegalStateException ise) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise);
    }
    if (tnums != null) {
        for (byte[] target : tnums) {
            if (target != null && target.length > (1 << 24) * .9) {
                log.warn("Approaching too many values for UnInvertedField faceting on field '" + field
                        + "' : bucket size=" + target.length);
            }
        }
    }

    // free space if outrageously wasteful (tradeoff memory/cpu) 
    if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
        int[] newMaxTermCounts = new int[numTermsInField];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
        maxTermCounts = newMaxTermCounts;
    }

    log.info("UnInverted multi-valued field " + toString());
    //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index);
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testEmptyIndex() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    iw.close();/*from  ww  w.j  ava 2 s  .  co m*/

    final DirectoryReader ir = DirectoryReader.open(dir);
    TestUtil.checkReader(ir);

    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    // check the leaves
    // (normally there are none for an empty index, so this is really just future
    // proofing in case that changes for some reason)
    for (LeafReaderContext rc : ir.leaves()) {
        final LeafReader r = rc.reader();
        final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
        assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
        assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
    }

    // check the composite 
    final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
    assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
    assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testSimple() throws Exception {
    Directory dir = newDirectory();/*from  ww w  .  j  ava  2 s  . c  om*/
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);

    field.setStringValue("d e f");
    w.addDocument(doc);

    field.setStringValue("a f");
    w.addDocument(doc);

    final IndexReader r = w.getReader();
    w.close();

    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);

    assertEquals(0, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    assertEquals(1, iter.nextDoc());
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    assertEquals(2, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    r.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        throws Exception {

    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE,
            TestUtil.nextInt(random(), 2, 10));

    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    /*//from  w  w  w. j  a  v a 2s. c  o  m
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }

    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse(
                            "term=" + termsEnum.term().utf8ToString() + " matches prefix="
                                    + prefixRef.utf8ToString(),
                            StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                    // ok
                }
            } else {
                // ok
            }
        }
        return;
    }

    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }

        if (VERBOSE) {
            System.out.println(
                    "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord="
                    + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.uid.Versions.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>//  w w w.  j  a  va 2s  .  c om
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookup(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>// w ww .j a  va 2 s. co  m
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME);
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return null;
    }
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReaderContext context = leaves.get(i);
        LeafReader leaf = context.reader();
        PerThreadIDAndVersionLookup lookup = getLookupState(leaf);
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), leaf.getLiveDocs(), context);
        if (result != null) {
            return result;
        }
    }
    return null;
}

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the sequence number for the given uid term, returning
 * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
 *///ww w .  j  a v  a  2 s . c om
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got "
                                + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }

    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}