Example usage for org.apache.lucene.util BitSet cardinality

Introduction

In this page you can find the example usage for org.apache.lucene.util BitSet cardinality.

Prototype

public abstract int cardinality();

Source Link

Document

Return the number of bits that are set.

Usage

From source file:com.dasasian.chok.lucene.LuceneServer.java

License:Apache License

/**
 * Merges the already sorted sub-lists to one big sorted list.
 *//*from   www.ja v a  2 s.  c  o  m*/
private static List<Hit> mergeFieldSort(FieldSortComparator comparator, int count, ScoreDoc[][] sortedFieldDocs,
        String[] shards, String nodeName) {
    int[] arrayPositions = new int[sortedFieldDocs.length];
    final List<Hit> sortedResult = new ArrayList<>(count);

    BitSet listDone = new BitSet(sortedFieldDocs.length);
    for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) {
        if (sortedFieldDocs[subListIndex].length == 0) {
            listDone.set(subListIndex, true);
        }
    }
    do {
        int fieldDocArrayWithSmallestFieldDoc = -1;
        FieldDoc smallestFieldDoc = null;
        for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) {
            if (!listDone.get(subListIndex)) {
                FieldDoc hit = (FieldDoc) sortedFieldDocs[subListIndex][arrayPositions[subListIndex]];
                if (smallestFieldDoc == null || comparator.compare(hit.fields, smallestFieldDoc.fields) < 0) {
                    smallestFieldDoc = hit;
                    fieldDocArrayWithSmallestFieldDoc = subListIndex;
                }
            }
        }
        ScoreDoc[] smallestElementList = sortedFieldDocs[fieldDocArrayWithSmallestFieldDoc];
        FieldDoc fieldDoc = (FieldDoc) smallestElementList[arrayPositions[fieldDocArrayWithSmallestFieldDoc]];
        arrayPositions[fieldDocArrayWithSmallestFieldDoc]++;
        final Hit hit = new Hit(shards[fieldDocArrayWithSmallestFieldDoc], nodeName, fieldDoc.score,
                fieldDoc.doc);
        hit.setSortFields(WritableType.convertComparable(comparator.getFieldTypes(), fieldDoc.fields));
        sortedResult.add(hit);
        if (arrayPositions[fieldDocArrayWithSmallestFieldDoc] >= smallestElementList.length) {
            listDone.set(fieldDocArrayWithSmallestFieldDoc, true);
        }
    } while (sortedResult.size() < count && listDone.cardinality() < arrayPositions.length);
    return sortedResult;
}

From source file:com.dasasian.chok.lucene.LuceneServer.java

License:Apache License

/**
 * Search in the given shards and return max hits for given query
 *
 * @param query the query/*www  .j  av  a  2 s .co  m*/
 * @param freqs document frequency writer
 * @param shards the shards
 * @param result the writable for the result
 * @param max max results
 * @param sort the sort order
 * @param timeout timeout value
 * @param filter filter to apply
 * @throws IOException when an error occurs
 */
protected final void search(final Query query, final DocumentFrequencyWritable freqs, final String[] shards,
        final HitsMapWritable result, final int max, Sort sort, long timeout, Filter filter)
        throws IOException {
    timeout = getCollectorTimeout(timeout);
    final Query rewrittenQuery = rewrite(query, shards);
    final int numDocs = freqs.getNumDocsAsInteger();
    final Weight weight = rewrittenQuery
            .weight(new CachedDfSource(freqs.getAll(), numDocs, new DefaultSimilarity()));
    int totalHits = 0;
    final int shardsCount = shards.length;

    // Run the search in parallel on the shards with a thread pool.
    CompletionService<SearchResult> csSearch = new ExecutorCompletionService<>(threadPool);

    for (int i = 0; i < shardsCount; i++) {
        SearchCall call = new SearchCall(shards[i], weight, max, sort, timeout, i, filter);
        csSearch.submit(call);
    }

    final ScoreDoc[][] scoreDocs = new ScoreDoc[shardsCount][];
    ScoreDoc scoreDocExample = null;
    for (int i = 0; i < shardsCount; i++) {
        try {
            final SearchResult searchResult = csSearch.take().get();
            final int callIndex = searchResult.getSearchCallIndex();

            totalHits += searchResult._totalHits;
            scoreDocs[callIndex] = searchResult._scoreDocs;
            if (scoreDocExample == null && scoreDocs[callIndex].length > 0) {
                scoreDocExample = scoreDocs[callIndex][0];
            }
        } catch (InterruptedException e) {
            throw new IOException("Multithread shard search interrupted:", e);
        } catch (ExecutionException e) {
            throw new IOException("Multithread shard search could not be executed:", e);
        }
    }

    result.addTotalHits(totalHits);

    final Iterable<Hit> finalHitList;
    // Limit the request to the number requested or the total number of
    // documents, whichever is smaller.
    int limit = Math.min(numDocs, max);
    if (sort == null || totalHits == 0) {
        final ChokHitQueue hq = new ChokHitQueue(limit);
        int pos = 0;
        BitSet done = new BitSet(shardsCount);
        while (done.cardinality() != shardsCount) {
            ScoreDoc scoreDoc = null;
            for (int i = 0; i < shardsCount; i++) {
                // only process this shard if it is not yet done.
                if (!done.get(i)) {
                    final ScoreDoc[] docs = scoreDocs[i];
                    if (pos < docs.length) {
                        scoreDoc = docs[pos];
                        final Hit hit = new Hit(shards[i], getNodeName(), scoreDoc.score, scoreDoc.doc);
                        if (!hq.insert(hit)) {
                            // no doc left that has a higher score than the lowest score in
                            // the queue
                            done.set(i, true);
                        }
                    } else {
                        // no docs left in this shard
                        done.set(i, true);
                    }
                }
            }
            // we always wait until we got all hits from this position in all
            // shards.

            pos++;
            if (scoreDoc == null) {
                // we do not have any more data
                break;
            }
        }
        finalHitList = hq;
    } else {
        WritableType[] sortFieldsTypes;
        FieldDoc fieldDoc = (FieldDoc) scoreDocExample;
        sortFieldsTypes = WritableType.detectWritableTypes(fieldDoc.fields);
        result.setSortFieldTypes(sortFieldsTypes);
        finalHitList = mergeFieldSort(new FieldSortComparator(sort.getSort(), sortFieldsTypes), limit,
                scoreDocs, shards, getNodeName());
    }

    for (Hit hit : finalHitList) {
        if (hit != null) {
            result.addHit(hit);
        }
    }
}

From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*w  w  w  .j a v a  2s.co  m*/
public void testBits2BitSet() throws Exception {
    final FixedBitSet fbs = new FixedBitSet(11);
    fbs.set(1);
    fbs.set(3);
    fbs.set(6);
    fbs.set(7);
    fbs.set(8);
    fbs.set(10);

    final BitSet result = BitsUtils.bits2BitSet(fbs);

    Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality());
    for (int i = 0; i < 11; i++) {
        Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i));
    }
}

From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java

License:Open Source License

/**
 * Get the count of documents available in the set.
 *
 * @param dis Documents id set//from   w w w.  j a va  2 s  .c  om
 * @return Cardinality
 * @throws IOException Thrown on low-level I/O-errors
 */
public static int cardinality(@NotNull final DocIdSet dis) throws IOException {
    final int cardinality;

    if (RoaringDocIdSet.class.isInstance(dis)) {
        cardinality = ((RoaringDocIdSet) dis).cardinality();
    } else {
        @Nullable
        final BitSet bits = bits(dis);
        if (bits == null) {
            @Nullable
            final DocIdSetIterator disi = dis.iterator();
            cardinality = disi == null ? 0 : (int) StreamUtils.stream(disi).count();
        } else {
            cardinality = bits.cardinality();
        }
    }
    return cardinality < 0 ? 0 : cardinality;
}

From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }//from   ww w.ja  v  a 2 s  . c  o m
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java

License:Apache License

private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException {
    int count = 0;
    for (LeafReaderContext ctx : reader.leaves()) {
        final BitSet bitSet = producer.getBitSet(ctx);
        if (bitSet != null) {
            count += bitSet.cardinality();
        }//from w ww  .jav a 2  s.  c o m
    }
    return count;
}

From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java

License:Apache License

@Override
public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException {
    List<PercolateQuery> percolateQueries = locatePercolatorQuery(context.query());
    if (percolateQueries.isEmpty()) {
        return;/*from w  w w .j ava2  s  .  com*/
    }

    boolean singlePercolateQuery = percolateQueries.size() == 1;
    for (PercolateQuery percolateQuery : percolateQueries) {
        String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX
                : FIELD_NAME_PREFIX + "_" + percolateQuery.getName();
        IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
        Weight weight = percolatorIndexSearcher.createNormalizedWeight(Queries.newNonNestedFilter(), false);
        Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
        int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
        BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc);
        int[] rootDocsBySlot = null;
        boolean hasNestedDocs = rootDocs.cardinality() != percolatorIndexSearcher.getIndexReader().numDocs();
        if (hasNestedDocs) {
            rootDocsBySlot = buildRootDocsSlots(rootDocs);
        }

        PercolateQuery.QueryStore queryStore = percolateQuery.getQueryStore();
        List<LeafReaderContext> ctxs = context.searcher().getIndexReader().leaves();
        for (SearchHit hit : hits) {
            LeafReaderContext ctx = ctxs.get(ReaderUtil.subIndex(hit.docId(), ctxs));
            int segmentDocId = hit.docId() - ctx.docBase;
            Query query = queryStore.getQueries(ctx).apply(segmentDocId);

            TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc,
                    new Sort(SortField.FIELD_DOC));
            if (topDocs.totalHits == 0) {
                // This hit didn't match with a percolate query,
                // likely to happen when percolating multiple documents
                continue;
            }

            Map<String, DocumentField> fields = hit.fieldsOrNull();
            if (fields == null) {
                fields = new HashMap<>();
                hit.fields(fields);
            }
            IntStream slots = convertTopDocsToSlots(topDocs, rootDocsBySlot);
            fields.put(fieldName, new DocumentField(fieldName, slots.boxed().collect(Collectors.toList())));
        }
    }
}

From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java

License:Apache License

static int[] buildRootDocsSlots(BitSet rootDocs) {
    int slot = 0;
    int[] rootDocsBySlot = new int[rootDocs.cardinality()];
    BitSetIterator iterator = new BitSetIterator(rootDocs, 0);
    for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) {
        rootDocsBySlot[slot++] = rootDocId;
    }/*from   w w w.j a  v a 2s . com*/
    return rootDocsBySlot;
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java

License:Open Source License

/**
 * Compute the number of live documents. This method is SLOW.
 */// ww  w  .  j  a v  a  2 s .  co  m
private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) {
    final Bits liveDocs = reader.getLiveDocs();
    if (roleQueryBits == null) {
        return 0;
    } else if (liveDocs == null) {
        // slow
        return roleQueryBits.cardinality();
    } else {
        // very slow, but necessary in order to be correct
        int numDocs = 0;
        DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost
        try {
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                if (liveDocs.get(doc)) {
                    numDocs++;
                }
            }
            return numDocs;
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
}