List of usage examples for org.apache.lucene.index LeafReader getLiveDocs
public abstract Bits getLiveDocs();
From source file:org.elasticsearch.common.lucene.uid.VersionsResolver.java
License:Apache License
/** * Returns the primary term for the given uid term, returning {@code 0} if none is found. *///ww w . j a v a 2s . com public static long loadPrimaryTerm(IndexReader reader, Term term) throws IOException { assert term.field().equals(UidFieldMapper.NAME) : "can only load _primary_term by uid"; List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return 0; } // iterate backwards to optimize for the frequently updated documents // which are likely to be in the last segments for (int i = leaves.size() - 1; i >= 0; i--) { LeafReader leaf = leaves.get(i).reader(); Bits liveDocs = leaf.getLiveDocs(); TermsEnum termsEnum = null; NumericDocValues dvField = null; PostingsEnum docsEnum = null; final Fields fields = leaf.fields(); if (fields != null) { Terms terms = fields.terms(UidFieldMapper.NAME); if (terms != null) { termsEnum = terms.iterator(); assert termsEnum != null; dvField = leaf.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME); assert dvField != null; final BytesRef id = term.bytes(); if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the // case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum .nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { return dvField.get(docID); } } } } } return 0; }
From source file:org.elasticsearch.index.shard.IndexShardTestCase.java
License:Apache License
protected Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException { shard.refresh("get_uids"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { Set<Uid> ids = new HashSet<>(); for (LeafReaderContext leafContext : searcher.reader().leaves()) { LeafReader reader = leafContext.reader(); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME)); ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME))); }/* ww w . ja v a 2 s .co m*/ } } return ids; } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java
License:Open Source License
/** * Compute the number of live documents. This method is SLOW. *//*from w w w . j a v a 2s . c o m*/ private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) { final Bits liveDocs = reader.getLiveDocs(); if (roleQueryBits == null) { return 0; } else if (liveDocs == null) { // slow return roleQueryBits.cardinality(); } else { // very slow, but necessary in order to be correct int numDocs = 0; DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost try { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (liveDocs.get(doc)) { numDocs++; } } return numDocs; } catch (IOException e) { throw new UncheckedIOException(e); } } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReaderTests.java
License:Open Source License
public void testLiveDocs() throws Exception { int numDocs = scaledRandomIntBetween(16, 128); IndexWriter iw = new IndexWriter(directory, new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(NoMergePolicy.INSTANCE)); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(new StringField("field", "value" + i, Field.Store.NO)); iw.addDocument(document);//from ww w . j a va 2 s . c o m } iw.forceMerge(1); iw.close(); openDirectoryReader(); assertThat("should have one segment after force merge", directoryReader.leaves().size(), equalTo(1)); for (int i = 0; i < numDocs; i++) { Query roleQuery = new TermQuery(new Term("field", "value" + i)); DirectoryReader wrappedReader = DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache, roleQuery); LeafReader leafReader = wrappedReader.leaves().get(0).reader(); assertThat(leafReader.hasDeletions(), is(true)); assertThat(leafReader.numDocs(), equalTo(1)); Bits liveDocs = leafReader.getLiveDocs(); assertThat(liveDocs.length(), equalTo(numDocs)); for (int docId = 0; docId < numDocs; docId++) { if (docId == i) { assertThat("docId [" + docId + "] should match", liveDocs.get(docId), is(true)); } else { assertThat("docId [" + docId + "] should not match", liveDocs.get(docId), is(false)); } } } }
From source file:org.modeshape.jcr.index.lucene.query.ConstantScoreWeightQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { Set<String> fieldSet = Collections.singleton(field); // return a weight which uses a constant (1.0f) scorer... return new RandomAccessWeight(this) { @Override//w w w . j av a2 s.c o m protected Bits getMatchingDocs(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); Bits liveDocs = leafReader.getLiveDocs(); // if liveDocs is null it means there are no deleted documents... int docsCount = liveDocs != null ? liveDocs.length() : leafReader.numDocs(); FixedBitSet result = new FixedBitSet(leafReader.maxDoc()); for (int i = 0; i < docsCount; i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = leafReader.document(i, fieldSet); IndexableField[] fields = document.getFields(field); if (fields.length == 0) { // the document doesn't have the field... continue; } if (areValid(fields)) { result.set(i); } } return result.cardinality() > 0 ? result : null; } }; }
From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java
License:Apache License
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(); }/* w w w . j a va 2 s . c o m*/ int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } // for each id given for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); // for each leaf reader.. for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final LeafReader subReader = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for? if (termsEnum.seekExact(id)) { final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0); // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check) if (docs != null) { final int docID = docs.nextDoc(); Bits liveDocs = subReader.getLiveDocs(); // But wait, maybe some of the docs have been deleted! Check that too.. if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += subReader.maxDoc(); } } return results; }
From source file:org.voyanttools.trombone.lucene.CorpusMapper.java
License:Open Source License
/** * This should not be called, except from the private build() method. * @throws IOException/* w ww . j a v a2s . c o m*/ */ private void buildFromTermsEnum() throws IOException { LeafReader reader = SlowCompositeReaderWrapper .wrap(storage.getLuceneManager().getDirectoryReader(corpus.getId())); Terms terms = reader.terms("id"); TermsEnum termsEnum = terms.iterator(); BytesRef bytesRef = termsEnum.next(); int doc; String id; Set<String> ids = new HashSet<String>(getCorpusDocumentIds()); bitSet = new SparseFixedBitSet(reader.numDocs()); Bits liveBits = reader.getLiveDocs(); while (bytesRef != null) { PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE); doc = postingsEnum.nextDoc(); if (doc != PostingsEnum.NO_MORE_DOCS) { id = bytesRef.utf8ToString(); if (ids.contains(id)) { bitSet.set(doc); luceneIds.add(doc); documentIdToLuceneIdMap.put(id, doc); luceneIdToDocumentIdMap.put(doc, id); } } bytesRef = termsEnum.next(); } this.reader = new FilteredCorpusReader(reader, bitSet); }