Example usage for org.apache.lucene.util BitSet cardinality

List of usage examples for org.apache.lucene.util BitSet cardinality

Introduction

In this page you can find the example usage for org.apache.lucene.util BitSet cardinality.

Prototype

public abstract int cardinality();

Source Link

Document

Return the number of bits that are set.

Usage

From source file:com.dasasian.chok.lucene.LuceneServer.java

License:Apache License

/**
 * Merges the already sorted sub-lists to one big sorted list.
 *//*from   www.ja v a  2 s.  c  o  m*/
private static List<Hit> mergeFieldSort(FieldSortComparator comparator, int count, ScoreDoc[][] sortedFieldDocs,
        String[] shards, String nodeName) {
    int[] arrayPositions = new int[sortedFieldDocs.length];
    final List<Hit> sortedResult = new ArrayList<>(count);

    BitSet listDone = new BitSet(sortedFieldDocs.length);
    for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) {
        if (sortedFieldDocs[subListIndex].length == 0) {
            listDone.set(subListIndex, true);
        }
    }
    do {
        int fieldDocArrayWithSmallestFieldDoc = -1;
        FieldDoc smallestFieldDoc = null;
        for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) {
            if (!listDone.get(subListIndex)) {
                FieldDoc hit = (FieldDoc) sortedFieldDocs[subListIndex][arrayPositions[subListIndex]];
                if (smallestFieldDoc == null || comparator.compare(hit.fields, smallestFieldDoc.fields) < 0) {
                    smallestFieldDoc = hit;
                    fieldDocArrayWithSmallestFieldDoc = subListIndex;
                }
            }
        }
        ScoreDoc[] smallestElementList = sortedFieldDocs[fieldDocArrayWithSmallestFieldDoc];
        FieldDoc fieldDoc = (FieldDoc) smallestElementList[arrayPositions[fieldDocArrayWithSmallestFieldDoc]];
        arrayPositions[fieldDocArrayWithSmallestFieldDoc]++;
        final Hit hit = new Hit(shards[fieldDocArrayWithSmallestFieldDoc], nodeName, fieldDoc.score,
                fieldDoc.doc);
        hit.setSortFields(WritableType.convertComparable(comparator.getFieldTypes(), fieldDoc.fields));
        sortedResult.add(hit);
        if (arrayPositions[fieldDocArrayWithSmallestFieldDoc] >= smallestElementList.length) {
            listDone.set(fieldDocArrayWithSmallestFieldDoc, true);
        }
    } while (sortedResult.size() < count && listDone.cardinality() < arrayPositions.length);
    return sortedResult;
}

From source file:com.dasasian.chok.lucene.LuceneServer.java

License:Apache License

/**
 * Search in the given shards and return max hits for given query
 *
 * @param query the query/*www  .j  av  a  2 s .co  m*/
 * @param freqs document frequency writer
 * @param shards the shards
 * @param result the writable for the result
 * @param max max results
 * @param sort the sort order
 * @param timeout timeout value
 * @param filter filter to apply
 * @throws IOException when an error occurs
 */
protected final void search(final Query query, final DocumentFrequencyWritable freqs, final String[] shards,
        final HitsMapWritable result, final int max, Sort sort, long timeout, Filter filter)
        throws IOException {
    timeout = getCollectorTimeout(timeout);
    final Query rewrittenQuery = rewrite(query, shards);
    final int numDocs = freqs.getNumDocsAsInteger();
    final Weight weight = rewrittenQuery
            .weight(new CachedDfSource(freqs.getAll(), numDocs, new DefaultSimilarity()));
    int totalHits = 0;
    final int shardsCount = shards.length;

    // Run the search in parallel on the shards with a thread pool.
    CompletionService<SearchResult> csSearch = new ExecutorCompletionService<>(threadPool);

    for (int i = 0; i < shardsCount; i++) {
        SearchCall call = new SearchCall(shards[i], weight, max, sort, timeout, i, filter);
        csSearch.submit(call);
    }

    final ScoreDoc[][] scoreDocs = new ScoreDoc[shardsCount][];
    ScoreDoc scoreDocExample = null;
    for (int i = 0; i < shardsCount; i++) {
        try {
            final SearchResult searchResult = csSearch.take().get();
            final int callIndex = searchResult.getSearchCallIndex();

            totalHits += searchResult._totalHits;
            scoreDocs[callIndex] = searchResult._scoreDocs;
            if (scoreDocExample == null && scoreDocs[callIndex].length > 0) {
                scoreDocExample = scoreDocs[callIndex][0];
            }
        } catch (InterruptedException e) {
            throw new IOException("Multithread shard search interrupted:", e);
        } catch (ExecutionException e) {
            throw new IOException("Multithread shard search could not be executed:", e);
        }
    }

    result.addTotalHits(totalHits);

    final Iterable<Hit> finalHitList;
    // Limit the request to the number requested or the total number of
    // documents, whichever is smaller.
    int limit = Math.min(numDocs, max);
    if (sort == null || totalHits == 0) {
        final ChokHitQueue hq = new ChokHitQueue(limit);
        int pos = 0;
        BitSet done = new BitSet(shardsCount);
        while (done.cardinality() != shardsCount) {
            ScoreDoc scoreDoc = null;
            for (int i = 0; i < shardsCount; i++) {
                // only process this shard if it is not yet done.
                if (!done.get(i)) {
                    final ScoreDoc[] docs = scoreDocs[i];
                    if (pos < docs.length) {
                        scoreDoc = docs[pos];
                        final Hit hit = new Hit(shards[i], getNodeName(), scoreDoc.score, scoreDoc.doc);
                        if (!hq.insert(hit)) {
                            // no doc left that has a higher score than the lowest score in
                            // the queue
                            done.set(i, true);
                        }
                    } else {
                        // no docs left in this shard
                        done.set(i, true);
                    }
                }
            }
            // we always wait until we got all hits from this position in all
            // shards.

            pos++;
            if (scoreDoc == null) {
                // we do not have any more data
                break;
            }
        }
        finalHitList = hq;
    } else {
        WritableType[] sortFieldsTypes;
        FieldDoc fieldDoc = (FieldDoc) scoreDocExample;
        sortFieldsTypes = WritableType.detectWritableTypes(fieldDoc.fields);
        result.setSortFieldTypes(sortFieldsTypes);
        finalHitList = mergeFieldSort(new FieldSortComparator(sort.getSort(), sortFieldsTypes), limit,
                scoreDocs, shards, getNodeName());
    }

    for (Hit hit : finalHitList) {
        if (hit != null) {
            result.addHit(hit);
        }
    }
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*w  w  w  .j a v a  2s.co  m*/
public void testBits2BitSet() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);

    final BitSet result = BitsUtils.bits2BitSet(fbs);

    Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality());
    for (int i = 0; i < 11; i++) {
        Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i));
    }
}

From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java

License:Open Source License

/**
 * Get the count of documents available in the set.
 *
 * @param dis Documents id set//from   w w w.  j a va  2 s  .c  om
 * @return Cardinality
 * @throws IOException Thrown on low-level I/O-errors
 */
public static int cardinality(@NotNull final DocIdSet dis) throws IOException {
    final int cardinality;

    if (RoaringDocIdSet.class.isInstance(dis)) {
        cardinality = ((RoaringDocIdSet) dis).cardinality();
    } else {
        @Nullable
        final BitSet bits = bits(dis);
        if (bits == null) {
            @Nullable
            final DocIdSetIterator disi = dis.iterator();
            cardinality = disi == null ? 0 : (int) StreamUtils.stream(disi).count();
        } else {
            cardinality = bits.cardinality();
        }
    }
    return cardinality < 0 ? 0 : cardinality;
}

From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }//from   ww w.ja  v  a 2 s  . c  o m
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java

License:Apache License

private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException {
    int count = 0;
    for (LeafReaderContext ctx : reader.leaves()) {
        final BitSet bitSet = producer.getBitSet(ctx);
        if (bitSet != null) {
            count += bitSet.cardinality();
        }//from w ww  .jav a 2  s.  c o m
    }
    return count;
}

From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java

License:Apache License

@Override
public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException {
    List<PercolateQuery> percolateQueries = locatePercolatorQuery(context.query());
    if (percolateQueries.isEmpty()) {
        return;/*from w  w w .j ava2  s  .  com*/
    }

    boolean singlePercolateQuery = percolateQueries.size() == 1;
    for (PercolateQuery percolateQuery : percolateQueries) {
        String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX
                : FIELD_NAME_PREFIX + "_" + percolateQuery.getName();
        IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
        Weight weight = percolatorIndexSearcher.createNormalizedWeight(Queries.newNonNestedFilter(), false);
        Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
        int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
        BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc);
        int[] rootDocsBySlot = null;
        boolean hasNestedDocs = rootDocs.cardinality() != percolatorIndexSearcher.getIndexReader().numDocs();
        if (hasNestedDocs) {
            rootDocsBySlot = buildRootDocsSlots(rootDocs);
        }

        PercolateQuery.QueryStore queryStore = percolateQuery.getQueryStore();
        List<LeafReaderContext> ctxs = context.searcher().getIndexReader().leaves();
        for (SearchHit hit : hits) {
            LeafReaderContext ctx = ctxs.get(ReaderUtil.subIndex(hit.docId(), ctxs));
            int segmentDocId = hit.docId() - ctx.docBase;
            Query query = queryStore.getQueries(ctx).apply(segmentDocId);

            TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc,
                    new Sort(SortField.FIELD_DOC));
            if (topDocs.totalHits == 0) {
                // This hit didn't match with a percolate query,
                // likely to happen when percolating multiple documents
                continue;
            }

            Map<String, DocumentField> fields = hit.fieldsOrNull();
            if (fields == null) {
                fields = new HashMap<>();
                hit.fields(fields);
            }
            IntStream slots = convertTopDocsToSlots(topDocs, rootDocsBySlot);
            fields.put(fieldName, new DocumentField(fieldName, slots.boxed().collect(Collectors.toList())));
        }
    }
}

From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java

License:Apache License

static int[] buildRootDocsSlots(BitSet rootDocs) {
    int slot = 0;
    int[] rootDocsBySlot = new int[rootDocs.cardinality()];
    BitSetIterator iterator = new BitSetIterator(rootDocs, 0);
    for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) {
        rootDocsBySlot[slot++] = rootDocId;
    }/*from   w w w.j a  v a 2s . com*/
    return rootDocsBySlot;
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java

License:Open Source License

/**
 * Compute the number of live documents. This method is SLOW.
 */// ww  w  .  j  a v  a  2 s .  co  m
private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) {
    final Bits liveDocs = reader.getLiveDocs();
    if (roleQueryBits == null) {
        return 0;
    } else if (liveDocs == null) {
        // slow
        return roleQueryBits.cardinality();
    } else {
        // very slow, but necessary in order to be correct
        int numDocs = 0;
        DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost
        try {
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                if (liveDocs.get(doc)) {
                    numDocs++;
                }
            }
            return numDocs;
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
}