Example usage for org.apache.lucene.util FixedBitSet set

List of usage examples for org.apache.lucene.util FixedBitSet set

Introduction

In this page you can find the example usage for org.apache.lucene.util FixedBitSet set.

Prototype

public void set(int index) 

Source Link

Usage

From source file:com.greplin.lucene.filter.PhraseFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    List<IndexReader> subReaders = IndexReaders.gatherSubReaders(reader);
    PhraseFilterMatchList[] results = new PhraseFilterMatchList[subReaders.size()];
    int matchCount = 0;
    int readerNumber = 0;

    for (IndexReader subReader : subReaders) {
        SortedSet<TermWithFrequency> termsOrderedByFrequency = Sets.newTreeSet();
        for (int i = 0; i < this.terms.length; i++) {
            Term t = this.terms[i];
            termsOrderedByFrequency.add(new TermWithFrequency(t, subReader.docFreq(t), i));
        }//  w  w  w. j  a  va 2s . c  o m

        PhraseFilterMatchList matches = null;
        TermPositions termPositions = subReader.termPositions();
        try {
            for (TermWithFrequency term : termsOrderedByFrequency) {
                if (term.docFreq == 0) {
                    break;
                }

                termPositions.seek(term.term);

                if (matches == null) {
                    // If this is the first term, collect all matches that intersect
                    // with the provided initial document set.
                    Intersection intersection = this.intersectionProvider.get(reader);

                    matches = new PhraseFilterMatchList(term.docFreq);
                    while (intersection.advanceToNextIntersection(termPositions)) {
                        int freq = termPositions.freq();
                        PhraseFilterIntList list = new PhraseFilterIntList(freq);
                        for (int i = 0; i < freq; i++) {
                            list.add(termPositions.nextPosition() - term.offset);
                        }
                        matches.add(termPositions.doc(), list);
                    }
                } else {
                    // Otherwise, intersect with the existing matches.
                    matches.intersect(termPositions, term.offset);
                }

                if (matches.getCount() == 0) {
                    break;
                }
            }
        } finally {
            termPositions.close();
        }

        if (matches != null) {
            results[readerNumber] = matches;
            matchCount += matches.getCount();
        }
        readerNumber++;
    }

    final int bitsPerIntPowerLogTwo = 5; // 2^5 = 32
    if (matchCount > reader.maxDoc() >> bitsPerIntPowerLogTwo) {
        FixedBitSet result = new FixedBitSet(reader.maxDoc());
        int readerOffset = 0;
        for (int readerIndex = 0; readerIndex < results.length; readerIndex++) {
            PhraseFilterMatchList matches = results[readerIndex];
            if (matches != null) {
                int count = matches.getCount();
                int[] docIds = matches.getDocIds();
                for (int i = 0; i < count; i++) {
                    result.set(docIds[i] + readerOffset);
                }
            }
            readerOffset += subReaders.get(readerIndex).maxDoc();
        }
        return result;
    } else if (matchCount == 0) {
        return DocIdSets.EMPTY;
    } else {
        int[] result = new int[matchCount];
        int base = 0;
        int readerOffset = 0;
        for (int readerIndex = 0; readerIndex < results.length; readerIndex++) {
            PhraseFilterMatchList matches = results[readerIndex];
            if (matches != null) {
                int count = matches.getCount();
                int[] docIds = matches.getDocIds();
                for (int i = 0; i < count; i++) {
                    result[base + i] = docIds[i] + readerOffset;
                }
                base += count;
            }
            readerOffset += subReaders.get(readerIndex).maxDoc();
        }
        return new SortedIntArrayDocIdSet(result);
    }
}

From source file:com.greplin.lucene.filter.TermsFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    FixedBitSet result = new FixedBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();//from  www. j a  v  a 2s.  c om
    try {
        for (Term term : this.terms) {
            td.seek(term);
            while (td.next()) {
                result.set(td.doc());
            }
        }
    } finally {
        td.close();
    }
    return result;
}

From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java

License:Open Source License

/**
 * Default merge impl: append documents, nodes and positions, mapping around
 * deletes./*from   ww w  .  j  a v  a2s  .  c  o  m*/
 * <p>
 * Bypass the {@link org.apache.lucene.codecs.PostingsConsumer#merge(org.apache.lucene.index.MergeState, org.apache.lucene.index.FieldInfo.IndexOptions, org.apache.lucene.index.DocsEnum, org.apache.lucene.util.FixedBitSet)}
 * methods and work directly with the BlockWriters for maximum efficiency.
 * <p>
 * TODO - Optimisation: If document blocks match the block size, and no
 * document deleted, then it would be possible to copy block directly as byte
 * array, avoiding decoding and encoding.
 **/
@Override
public TermStats merge(final MergeState mergeState, final IndexOptions indexOptions, final DocsEnum postings,
        final FixedBitSet visitedDocs) throws IOException {
    int df = 0;
    long totTF = 0;

    postingsEnum.setMergeState(mergeState);
    postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings);

    while (postingsEnum.nextDocument()) {
        final int doc = postingsEnum.doc();
        visitedDocs.set(doc);

        this.startDoc(doc, -1);

        final int nodeFreq = postingsEnum.nodeFreqInDoc();
        docWriter.writeNodeFreq(nodeFreq);

        while (postingsEnum.nextNode()) {
            final IntsRef node = postingsEnum.node();
            nodWriter.write(node);

            final int termFreqInNode = postingsEnum.termFreqInNode();
            nodWriter.writeTermFreq(termFreqInNode);

            // reset current position for delta computation
            posWriter.resetCurrentPosition();

            while (postingsEnum.nextPosition()) {
                final int position = postingsEnum.pos();
                posWriter.write(position);
                totTF++;
            }
        }
        df++;
    }

    return new TermStats(df, totTF);
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid,
        final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher,
        List<BytesRef> updatedCtids) throws IOException {
    final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>();

    if (updatedCtids.size() == 0)
        return visibilityBitSets;

    ////from   w  w w. ja  v  a2s .c  om
    // build a map of {@link VisibilityInfo} objects by each _prev_ctid
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>();
    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))),
            new ZomboDBTermsCollector(field) {
                private SortedDocValues prevCtids;
                private SortedNumericDocValues xids;
                private SortedNumericDocValues sequence;
                private int ord;
                private int maxdoc;

                @Override
                public void collect(int doc) throws IOException {
                    xids.setDocument(doc);
                    sequence.setDocument(doc);

                    long xid = xids.valueAt(0);
                    long seq = sequence.valueAt(0);
                    BytesRef prevCtid = prevCtids.get(doc);

                    List<VisibilityInfo> matchingDocs = map.get(prevCtid);

                    if (matchingDocs == null)
                        map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>());
                    matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
                    xids = context.reader().getSortedNumericDocValues("_xid");
                    sequence = context.reader().getSortedNumericDocValues("_zdb_seq");
                    ord = context.ord;
                    maxdoc = context.reader().maxDoc();
                }
            });

    if (map.isEmpty())
        return visibilityBitSets;

    //
    // pick out the first VisibilityInfo for each document that is visible & committed
    // and build a FixedBitSet for each reader 'ord' that contains visible
    // documents.  A map of these (key'd on reader ord) is what we return.
    //

    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() {
        /* overloaded to avoid making a copy of the byte array */
        @Override
        public BytesRef toBytesRef() {
            return new BytesRef(this.bytes(), 0, this.length());
        }
    };

    Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid");
    TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null);
    for (List<VisibilityInfo> visibility : map.values()) {
        CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() {
            @Override
            public int compare(VisibilityInfo o1, VisibilityInfo o2) {
                int cmp = Long.compare(o2.xid, o1.xid);
                return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp;
            }
        });

        boolean foundVisible = false;
        for (VisibilityInfo mapping : visibility) {

            if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid
                    && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) {
                // document is not visible to us
                FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd);
                if (visibilityBitset == null)
                    visibilityBitSets.put(mapping.readerOrd,
                            visibilityBitset = new FixedBitSet(mapping.maxdoc));
                visibilityBitset.set(mapping.docid);
            } else {
                foundVisible = true;
            }
        }
    }

    return visibilityBitSets;
}

From source file:de.jetsli.lumeo.util.TermFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    AtomicReader reader = context.reader();
    FixedBitSet result = new FixedBitSet(reader.maxDoc());
    DocsEnum de = reader.termDocsEnum(acceptDocs, fieldName, bytes, false);
    if (de == null)
        return result;

    int id;//w  w  w .j a v  a  2  s  . co m
    while ((id = de.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        result.set(id);
    }
    return result;
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java

License:Open Source License

/**
 * Remove terms from the given collection, if they are not found in the
 * collection./*from  ww  w . j a v  a 2  s .c  om*/
 *
 * @param dataProv IndexDataProvider
 * @param terms Collection of terms to check against the collection
 * @return Passed in terms with non-collection terms removed
 */
@SuppressFBWarnings("LO_APPENDED_STRING_IN_FORMAT_STRING")
private static BytesRefArray removeUnknownTerms(@NotNull final IndexDataProvider dataProv,
        @NotNull final BytesRefArray terms) {
    final StringBuilder sb = new StringBuilder("Skipped terms (stopword or not in collection): [");
    final FixedBitSet bits = new FixedBitSet(terms.size());
    final BytesRefBuilder spare = new BytesRefBuilder();
    BytesRef term;

    if (terms.size() == 0) {
        return terms;
    } else {
        for (int i = terms.size() - 1; i >= 0; i--) {
            term = terms.get(spare, i);
            if (dataProv.getTermFrequency(term) <= 0L) {
                sb.append(term.utf8ToString()).append(' ');
                bits.set(i);
            }
        }

        if (bits.cardinality() > 0) {
            LOG.warn(sb.toString().trim() + "].");
            final BytesRefArray cleanTerms = new BytesRefArray(Counter.newCounter(false));
            for (int i = terms.size() - 1; i >= 0; i--) {
                if (!bits.get(i)) {
                    term = terms.get(spare, i);
                    cleanTerms.append(term); // copies bytes
                }
            }
            return cleanTerms;
        }
        return terms;
    }
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*  w ww.  j  a v a2s. c o  m*/
public void testBits2BitSet() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);

    final BitSet result = BitsUtils.bits2BitSet(fbs);

    Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality());
    for (int i = 0; i < 11; i++) {
        Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i));
    }
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@Test
public void testBits2FixedBitSet() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);//w ww .  j  a v  a2s.  co m
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);

    final FixedBitSet result = BitsUtils.bits2FixedBitSet(fbs);
    Assert.assertTrue("BitSets not equal.", fbs.equals(result));
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@Test
public void testArrayToBits() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);//  w  w w.jav a  2s . co m
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);
    final int[] bits = { 1, 3, 6, 7, 8, 10 };

    final FixedBitSet result = BitsUtils.arrayToBits(bits);
    Assert.assertTrue("BitSets not equal.", fbs.equals(result));
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bitSet() throws Exception {
    final FixedBitSet bits = new FixedBitSet(11);
    bits.set(1);
    bits.set(3);//w  ww  .j  a  va  2s  .  co  m
    bits.set(6);
    bits.set(7);
    bits.set(8);
    bits.set(10);

    Assert.assertEquals("Not all bits streamed.", 6L, StreamUtils.stream(bits).count());

    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 1).count());
    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 3).count());
    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 6).count());
    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 7).count());
    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 8).count());
    Assert.assertEquals("Bit not found.", 1L, StreamUtils.stream(bits).filter(id -> id == 10).count());

    Assert.assertEquals("Unknown document id found.", 0L, StreamUtils.stream(bits)
            .filter(id -> id != 1 && id != 3 && id != 6 && id != 7 && id != 8 && id != 10).count());
}