List of usage examples for org.apache.lucene.util FixedBitSet set
public void set(int index)
From source file:com.greplin.lucene.filter.PhraseFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { List<IndexReader> subReaders = IndexReaders.gatherSubReaders(reader); PhraseFilterMatchList[] results = new PhraseFilterMatchList[subReaders.size()]; int matchCount = 0; int readerNumber = 0; for (IndexReader subReader : subReaders) { SortedSet<TermWithFrequency> termsOrderedByFrequency = Sets.newTreeSet(); for (int i = 0; i < this.terms.length; i++) { Term t = this.terms[i]; termsOrderedByFrequency.add(new TermWithFrequency(t, subReader.docFreq(t), i)); }// w w w. j a va 2s . c o m PhraseFilterMatchList matches = null; TermPositions termPositions = subReader.termPositions(); try { for (TermWithFrequency term : termsOrderedByFrequency) { if (term.docFreq == 0) { break; } termPositions.seek(term.term); if (matches == null) { // If this is the first term, collect all matches that intersect // with the provided initial document set. Intersection intersection = this.intersectionProvider.get(reader); matches = new PhraseFilterMatchList(term.docFreq); while (intersection.advanceToNextIntersection(termPositions)) { int freq = termPositions.freq(); PhraseFilterIntList list = new PhraseFilterIntList(freq); for (int i = 0; i < freq; i++) { list.add(termPositions.nextPosition() - term.offset); } matches.add(termPositions.doc(), list); } } else { // Otherwise, intersect with the existing matches. matches.intersect(termPositions, term.offset); } if (matches.getCount() == 0) { break; } } } finally { termPositions.close(); } if (matches != null) { results[readerNumber] = matches; matchCount += matches.getCount(); } readerNumber++; } final int bitsPerIntPowerLogTwo = 5; // 2^5 = 32 if (matchCount > reader.maxDoc() >> bitsPerIntPowerLogTwo) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result.set(docIds[i] + readerOffset); } } readerOffset += subReaders.get(readerIndex).maxDoc(); } return result; } else if (matchCount == 0) { return DocIdSets.EMPTY; } else { int[] result = new int[matchCount]; int base = 0; int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result[base + i] = docIds[i] + readerOffset; } base += count; } readerOffset += subReaders.get(readerIndex).maxDoc(); } return new SortedIntArrayDocIdSet(result); } }
From source file:com.greplin.lucene.filter.TermsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { FixedBitSet result = new FixedBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();//from www. j a v a 2s. c om try { for (Term term : this.terms) { td.seek(term); while (td.next()) { result.set(td.doc()); } } } finally { td.close(); } return result; }
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Open Source License
/** * Default merge impl: append documents, nodes and positions, mapping around * deletes./*from ww w . j a v a2s . c o m*/ * <p> * Bypass the {@link org.apache.lucene.codecs.PostingsConsumer#merge(org.apache.lucene.index.MergeState, org.apache.lucene.index.FieldInfo.IndexOptions, org.apache.lucene.index.DocsEnum, org.apache.lucene.util.FixedBitSet)} * methods and work directly with the BlockWriters for maximum efficiency. * <p> * TODO - Optimisation: If document blocks match the block size, and no * document deleted, then it would be possible to copy block directly as byte * array, avoiding decoding and encoding. **/ @Override public TermStats merge(final MergeState mergeState, final IndexOptions indexOptions, final DocsEnum postings, final FixedBitSet visitedDocs) throws IOException { int df = 0; long totTF = 0; postingsEnum.setMergeState(mergeState); postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings); while (postingsEnum.nextDocument()) { final int doc = postingsEnum.doc(); visitedDocs.set(doc); this.startDoc(doc, -1); final int nodeFreq = postingsEnum.nodeFreqInDoc(); docWriter.writeNodeFreq(nodeFreq); while (postingsEnum.nextNode()) { final IntsRef node = postingsEnum.node(); nodWriter.write(node); final int termFreqInNode = postingsEnum.termFreqInNode(); nodWriter.writeTermFreq(termFreqInNode); // reset current position for delta computation posWriter.resetCurrentPosition(); while (postingsEnum.nextPosition()) { final int position = postingsEnum.pos(); posWriter.write(position); totTF++; } } df++; } return new TermStats(df, totTF); }
From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java
License:Apache License
static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid, final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher, List<BytesRef> updatedCtids) throws IOException { final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>(); if (updatedCtids.size() == 0) return visibilityBitSets; ////from w w w. ja v a2s .c om // build a map of {@link VisibilityInfo} objects by each _prev_ctid // // We use XConstantScoreQuery here so that we exclude deleted docs // final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>(); searcher.search( new XConstantScoreQuery( SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))), new ZomboDBTermsCollector(field) { private SortedDocValues prevCtids; private SortedNumericDocValues xids; private SortedNumericDocValues sequence; private int ord; private int maxdoc; @Override public void collect(int doc) throws IOException { xids.setDocument(doc); sequence.setDocument(doc); long xid = xids.valueAt(0); long seq = sequence.valueAt(0); BytesRef prevCtid = prevCtids.get(doc); List<VisibilityInfo> matchingDocs = map.get(prevCtid); if (matchingDocs == null) map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>()); matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq)); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); xids = context.reader().getSortedNumericDocValues("_xid"); sequence = context.reader().getSortedNumericDocValues("_zdb_seq"); ord = context.ord; maxdoc = context.reader().maxDoc(); } }); if (map.isEmpty()) return visibilityBitSets; // // pick out the first VisibilityInfo for each document that is visible & committed // and build a FixedBitSet for each reader 'ord' that contains visible // documents. A map of these (key'd on reader ord) is what we return. // BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() { /* overloaded to avoid making a copy of the byte array */ @Override public BytesRef toBytesRef() { return new BytesRef(this.bytes(), 0, this.length()); } }; Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid"); TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null); for (List<VisibilityInfo> visibility : map.values()) { CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() { @Override public int compare(VisibilityInfo o1, VisibilityInfo o2) { int cmp = Long.compare(o2.xid, o1.xid); return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp; } }); boolean foundVisible = false; for (VisibilityInfo mapping : visibility) { if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) { // document is not visible to us FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd); if (visibilityBitset == null) visibilityBitSets.put(mapping.readerOrd, visibilityBitset = new FixedBitSet(mapping.maxdoc)); visibilityBitset.set(mapping.docid); } else { foundVisible = true; } } } return visibilityBitSets; }
From source file:de.jetsli.lumeo.util.TermFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { AtomicReader reader = context.reader(); FixedBitSet result = new FixedBitSet(reader.maxDoc()); DocsEnum de = reader.termDocsEnum(acceptDocs, fieldName, bytes, false); if (de == null) return result; int id;//w w w .j a v a 2 s . co m while ((id = de.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { result.set(id); } return result; }
From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java
License:Open Source License
/** * Remove terms from the given collection, if they are not found in the * collection./*from ww w . j a v a 2 s .c om*/ * * @param dataProv IndexDataProvider * @param terms Collection of terms to check against the collection * @return Passed in terms with non-collection terms removed */ @SuppressFBWarnings("LO_APPENDED_STRING_IN_FORMAT_STRING") private static BytesRefArray removeUnknownTerms(@NotNull final IndexDataProvider dataProv, @NotNull final BytesRefArray terms) { final StringBuilder sb = new StringBuilder("Skipped terms (stopword or not in collection): ["); final FixedBitSet bits = new FixedBitSet(terms.size()); final BytesRefBuilder spare = new BytesRefBuilder(); BytesRef term; if (terms.size() == 0) { return terms; } else { for (int i = terms.size() - 1; i >= 0; i--) { term = terms.get(spare, i); if (dataProv.getTermFrequency(term) <= 0L) { sb.append(term.utf8ToString()).append(' '); bits.set(i); } } if (bits.cardinality() > 0) { LOG.warn(sb.toString().trim() + "]."); final BytesRefArray cleanTerms = new BytesRefArray(Counter.newCounter(false)); for (int i = terms.size() - 1; i >= 0; i--) { if (!bits.get(i)) { term = terms.get(spare, i); cleanTerms.append(term); // copies bytes } } return cleanTerms; } return terms; } }
From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/* w ww. j a v a2s. c o m*/ public void testBits2BitSet() throws Exception { final FixedBitSet fbs = new FixedBitSet(11); fbs.set(1); fbs.set(3); fbs.set(6); fbs.set(7); fbs.set(8); fbs.set(10); final BitSet result = BitsUtils.bits2BitSet(fbs); Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality()); for (int i = 0; i < 11; i++) { Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i)); } }
From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java
License:Open Source License
@Test public void testBits2FixedBitSet() throws Exception { final FixedBitSet fbs = new FixedBitSet(11); fbs.set(1); fbs.set(3);//w ww . j a v a2s. co m fbs.set(6); fbs.set(7); fbs.set(8); fbs.set(10); final FixedBitSet result = BitsUtils.bits2FixedBitSet(fbs); Assert.assertTrue("BitSets not equal.", fbs.equals(result)); }
From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java
License:Open Source License
@Test public void testArrayToBits() throws Exception { final FixedBitSet fbs = new FixedBitSet(11); fbs.set(1); fbs.set(3);// w w w.jav a 2s . co m fbs.set(6); fbs.set(7); fbs.set(8); fbs.set(10); final int[] bits = { 1, 3, 6, 7, 8, 10 }; final FixedBitSet result = BitsUtils.arrayToBits(bits); Assert.assertTrue("BitSets not equal.", fbs.equals(result)); }
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_bitSet() throws Exception { final FixedBitSet bits = new FixedBitSet(11); bits.set(1); bits.set(3);//w ww .j a va 2s . co m bits.set(6); bits.set(7); bits.set(8); bits.set(10); Assert.assertEquals("Not all bits streamed.", 6L, StreamUtils.stream(bits).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 1).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 3).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 6).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 7).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 8).count()); Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 10).count()); Assert.assertEquals("Unknown document id found.", 0L, StreamUtils.stream(bits) .filter(id -> id != 1 && id != 3 && id != 6 && id != 7 && id != 8 && id != 10).count()); }