Example usage for org.apache.lucene.index LeafReader getLiveDocs

List of usage examples for org.apache.lucene.index LeafReader getLiveDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getLiveDocs.

Prototype

public abstract Bits getLiveDocs();

Source Link

Document

Returns the Bits representing live (not deleted) docs.

Usage

From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java

License:Apache License

/**
 * Returns the primary term for the given uid term, returning {@code 0} if none is found.
 *///ww w  . j  a v a 2s  .  com
public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return 0;
    }

    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();

        TermsEnum termsEnum = null;
        NumericDocValues dvField = null;
        PostingsEnum docsEnum = null;

        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
                assert dvField != null;

                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum
                            .nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }

                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        return dvField.get(docID);
                    }
                }
            }
        }

    }
    return 0;
}

From source file:org.elasticsearch.index.shard.IndexShardTestCase.java

License:Apache License

protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
    shard.refresh("get_uids");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        Set<Uid> ids = new HashSet<>();
        for (LeafReaderContext leafContext : searcher.reader().leaves()) {
            LeafReader reader = leafContext.reader();
            Bits liveDocs = reader.getLiveDocs();
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
                    ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
                }/*  ww  w .  ja  v  a 2 s  .co  m*/
            }
        }
        return ids;
    }
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java

License:Open Source License

/**
 * Compute the number of live documents. This method is SLOW.
 *//*from   w  w w . j  a v  a  2s . c o m*/
private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) {
    final Bits liveDocs = reader.getLiveDocs();
    if (roleQueryBits == null) {
        return 0;
    } else if (liveDocs == null) {
        // slow
        return roleQueryBits.cardinality();
    } else {
        // very slow, but necessary in order to be correct
        int numDocs = 0;
        DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost
        try {
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                if (liveDocs.get(doc)) {
                    numDocs++;
                }
            }
            return numDocs;
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReaderTests.java

License:Open Source License

public void testLiveDocs() throws Exception {
    int numDocs = scaledRandomIntBetween(16, 128);
    IndexWriter iw = new IndexWriter(directory,
            new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(NoMergePolicy.INSTANCE));

    for (int i = 0; i < numDocs; i++) {
        Document document = new Document();
        document.add(new StringField("field", "value" + i, Field.Store.NO));
        iw.addDocument(document);//from   ww  w . j  a  va 2 s  .  c o  m
    }

    iw.forceMerge(1);
    iw.close();

    openDirectoryReader();
    assertThat("should have one segment after force merge", directoryReader.leaves().size(), equalTo(1));

    for (int i = 0; i < numDocs; i++) {
        Query roleQuery = new TermQuery(new Term("field", "value" + i));
        DirectoryReader wrappedReader = DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache,
                roleQuery);

        LeafReader leafReader = wrappedReader.leaves().get(0).reader();
        assertThat(leafReader.hasDeletions(), is(true));
        assertThat(leafReader.numDocs(), equalTo(1));
        Bits liveDocs = leafReader.getLiveDocs();
        assertThat(liveDocs.length(), equalTo(numDocs));
        for (int docId = 0; docId < numDocs; docId++) {
            if (docId == i) {
                assertThat("docId [" + docId + "] should match", liveDocs.get(docId), is(true));
            } else {
                assertThat("docId [" + docId + "] should not match", liveDocs.get(docId), is(false));
            }
        }
    }
}

From source file:org.modeshape.jcr.index.lucene.query.ConstantScoreWeightQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    Set<String> fieldSet = Collections.singleton(field);
    // return a weight which uses a constant (1.0f) scorer...
    return new RandomAccessWeight(this) {
        @Override//w  w w  .  j  av  a2 s.c o  m
        protected Bits getMatchingDocs(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            Bits liveDocs = leafReader.getLiveDocs();
            // if liveDocs is null it means there are no deleted documents...
            int docsCount = liveDocs != null ? liveDocs.length() : leafReader.numDocs();
            FixedBitSet result = new FixedBitSet(leafReader.maxDoc());
            for (int i = 0; i < docsCount; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    continue;
                }
                Document document = leafReader.document(i, fieldSet);
                IndexableField[] fields = document.getFields(field);
                if (fields.length == 0) {
                    // the document doesn't have the field...
                    continue;
                }
                if (areValid(fields)) {
                    result.set(i);
                }
            }
            return result.cardinality() > 0 ? result : null;
        }
    };
}

From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java

License:Apache License

protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException {
    final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator();
    }/* w  w w .  j  a  va 2 s  .  c  o  m*/

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
        results[i] = -1;
    }

    // for each id given
    for (int idx = 0; idx < ids.length; idx++) {
        int base = 0;
        final BytesRef id = new BytesRef(ids[idx]);
        // for each leaf reader..
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            final LeafReader subReader = subReaders.get(subIDX).reader();
            final TermsEnum termsEnum = termsEnums[subIDX];
            // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for?
            if (termsEnum.seekExact(id)) {
                final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0);
                // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check)
                if (docs != null) {
                    final int docID = docs.nextDoc();
                    Bits liveDocs = subReader.getLiveDocs();
                    // But wait, maybe some of the docs have been deleted! Check that too..
                    if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) {
                        results[idx] = base + docID;
                        break;
                    }
                }
            }
            base += subReader.maxDoc();
        }
    }

    return results;
}

From source file:org.voyanttools.trombone.lucene.CorpusMapper.java

License:Open Source License

/**
 * This should not be called, except from the private build() method.
 * @throws IOException/* w ww . j  a v  a2s . c  o m*/
 */
private void buildFromTermsEnum() throws IOException {
    LeafReader reader = SlowCompositeReaderWrapper
            .wrap(storage.getLuceneManager().getDirectoryReader(corpus.getId()));

    Terms terms = reader.terms("id");
    TermsEnum termsEnum = terms.iterator();
    BytesRef bytesRef = termsEnum.next();
    int doc;
    String id;
    Set<String> ids = new HashSet<String>(getCorpusDocumentIds());
    bitSet = new SparseFixedBitSet(reader.numDocs());
    Bits liveBits = reader.getLiveDocs();
    while (bytesRef != null) {
        PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
        doc = postingsEnum.nextDoc();
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            id = bytesRef.utf8ToString();
            if (ids.contains(id)) {
                bitSet.set(doc);
                luceneIds.add(doc);
                documentIdToLuceneIdMap.put(id, doc);
                luceneIdToDocumentIdMap.put(doc, id);
            }
        }
        bytesRef = termsEnum.next();
    }
    this.reader = new FilteredCorpusReader(reader, bitSet);
}