List of usage examples for org.apache.lucene.util BitSet cardinality
public abstract int cardinality();
From source file:com.dasasian.chok.lucene.LuceneServer.java
License:Apache License
/** * Merges the already sorted sub-lists to one big sorted list. *//*from www.ja v a 2 s. c o m*/ private static List<Hit> mergeFieldSort(FieldSortComparator comparator, int count, ScoreDoc[][] sortedFieldDocs, String[] shards, String nodeName) { int[] arrayPositions = new int[sortedFieldDocs.length]; final List<Hit> sortedResult = new ArrayList<>(count); BitSet listDone = new BitSet(sortedFieldDocs.length); for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) { if (sortedFieldDocs[subListIndex].length == 0) { listDone.set(subListIndex, true); } } do { int fieldDocArrayWithSmallestFieldDoc = -1; FieldDoc smallestFieldDoc = null; for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) { if (!listDone.get(subListIndex)) { FieldDoc hit = (FieldDoc) sortedFieldDocs[subListIndex][arrayPositions[subListIndex]]; if (smallestFieldDoc == null || comparator.compare(hit.fields, smallestFieldDoc.fields) < 0) { smallestFieldDoc = hit; fieldDocArrayWithSmallestFieldDoc = subListIndex; } } } ScoreDoc[] smallestElementList = sortedFieldDocs[fieldDocArrayWithSmallestFieldDoc]; FieldDoc fieldDoc = (FieldDoc) smallestElementList[arrayPositions[fieldDocArrayWithSmallestFieldDoc]]; arrayPositions[fieldDocArrayWithSmallestFieldDoc]++; final Hit hit = new Hit(shards[fieldDocArrayWithSmallestFieldDoc], nodeName, fieldDoc.score, fieldDoc.doc); hit.setSortFields(WritableType.convertComparable(comparator.getFieldTypes(), fieldDoc.fields)); sortedResult.add(hit); if (arrayPositions[fieldDocArrayWithSmallestFieldDoc] >= smallestElementList.length) { listDone.set(fieldDocArrayWithSmallestFieldDoc, true); } } while (sortedResult.size() < count && listDone.cardinality() < arrayPositions.length); return sortedResult; }
From source file:com.dasasian.chok.lucene.LuceneServer.java
License:Apache License
/** * Search in the given shards and return max hits for given query * * @param query the query/*www .j av a 2 s .co m*/ * @param freqs document frequency writer * @param shards the shards * @param result the writable for the result * @param max max results * @param sort the sort order * @param timeout timeout value * @param filter filter to apply * @throws IOException when an error occurs */ protected final void search(final Query query, final DocumentFrequencyWritable freqs, final String[] shards, final HitsMapWritable result, final int max, Sort sort, long timeout, Filter filter) throws IOException { timeout = getCollectorTimeout(timeout); final Query rewrittenQuery = rewrite(query, shards); final int numDocs = freqs.getNumDocsAsInteger(); final Weight weight = rewrittenQuery .weight(new CachedDfSource(freqs.getAll(), numDocs, new DefaultSimilarity())); int totalHits = 0; final int shardsCount = shards.length; // Run the search in parallel on the shards with a thread pool. CompletionService<SearchResult> csSearch = new ExecutorCompletionService<>(threadPool); for (int i = 0; i < shardsCount; i++) { SearchCall call = new SearchCall(shards[i], weight, max, sort, timeout, i, filter); csSearch.submit(call); } final ScoreDoc[][] scoreDocs = new ScoreDoc[shardsCount][]; ScoreDoc scoreDocExample = null; for (int i = 0; i < shardsCount; i++) { try { final SearchResult searchResult = csSearch.take().get(); final int callIndex = searchResult.getSearchCallIndex(); totalHits += searchResult._totalHits; scoreDocs[callIndex] = searchResult._scoreDocs; if (scoreDocExample == null && scoreDocs[callIndex].length > 0) { scoreDocExample = scoreDocs[callIndex][0]; } } catch (InterruptedException e) { throw new IOException("Multithread shard search interrupted:", e); } catch (ExecutionException e) { throw new IOException("Multithread shard search could not be executed:", e); } } result.addTotalHits(totalHits); final Iterable<Hit> finalHitList; // Limit the request to the number requested or the total number of // documents, whichever is smaller. int limit = Math.min(numDocs, max); if (sort == null || totalHits == 0) { final ChokHitQueue hq = new ChokHitQueue(limit); int pos = 0; BitSet done = new BitSet(shardsCount); while (done.cardinality() != shardsCount) { ScoreDoc scoreDoc = null; for (int i = 0; i < shardsCount; i++) { // only process this shard if it is not yet done. if (!done.get(i)) { final ScoreDoc[] docs = scoreDocs[i]; if (pos < docs.length) { scoreDoc = docs[pos]; final Hit hit = new Hit(shards[i], getNodeName(), scoreDoc.score, scoreDoc.doc); if (!hq.insert(hit)) { // no doc left that has a higher score than the lowest score in // the queue done.set(i, true); } } else { // no docs left in this shard done.set(i, true); } } } // we always wait until we got all hits from this position in all // shards. pos++; if (scoreDoc == null) { // we do not have any more data break; } } finalHitList = hq; } else { WritableType[] sortFieldsTypes; FieldDoc fieldDoc = (FieldDoc) scoreDocExample; sortFieldsTypes = WritableType.detectWritableTypes(fieldDoc.fields); result.setSortFieldTypes(sortFieldsTypes); finalHitList = mergeFieldSort(new FieldSortComparator(sort.getSort(), sortFieldsTypes), limit, scoreDocs, shards, getNodeName()); } for (Hit hit : finalHitList) { if (hit != null) { result.addHit(hit); } } }
From source file:de.unihildesheim.iw.lucene.util.BitsUtilsTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/*w w w .j a v a 2s.co m*/ public void testBits2BitSet() throws Exception { final FixedBitSet fbs = new FixedBitSet(11); fbs.set(1); fbs.set(3); fbs.set(6); fbs.set(7); fbs.set(8); fbs.set(10); final BitSet result = BitsUtils.bits2BitSet(fbs); Assert.assertEquals("Bit count mismatch.", fbs.cardinality(), result.cardinality()); for (int i = 0; i < 11; i++) { Assert.assertEquals("Bits mismatch.", fbs.get(i), result.get(i)); } }
From source file:de.unihildesheim.iw.lucene.util.DocIdSetUtils.java
License:Open Source License
/** * Get the count of documents available in the set. * * @param dis Documents id set//from w w w. j a va 2 s .c om * @return Cardinality * @throws IOException Thrown on low-level I/O-errors */ public static int cardinality(@NotNull final DocIdSet dis) throws IOException { final int cardinality; if (RoaringDocIdSet.class.isInstance(dis)) { cardinality = ((RoaringDocIdSet) dis).cardinality(); } else { @Nullable final BitSet bits = bits(dis); if (bits == null) { @Nullable final DocIdSetIterator disi = dis.iterator(); cardinality = disi == null ? 0 : (int) StreamUtils.stream(disi).count(); } else { cardinality = bits.cardinality(); } } return cardinality < 0 ? 0 : cardinality; }
From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java
License:Apache License
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException { if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) { throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag); }//from ww w.ja v a 2 s . c o m this.docsEnumFlag = docsEnumFlag; if (filter == null) { // Important - need to use the doc count that includes deleted docs // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951 numDocs = reader.maxDoc(); } List<LeafReaderContext> leaves = reader.leaves(); List<Holder> enums = new ArrayList<>(leaves.size()); final Weight weight; if (filter == null) { weight = null; } else { final IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); weight = searcher.createNormalizedWeight(filter, false); } for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { continue; } BitSet bits = null; if (weight != null) { Scorer scorer = weight.scorer(context); if (scorer == null) { // fully filtered, none matching, no need to iterate on this continue; } DocIdSetIterator docs = scorer.iterator(); // we want to force apply deleted docs final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { docs = new FilteredDocIdSetIterator(docs) { @Override protected boolean match(int doc) { return liveDocs.get(doc); } }; } BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc()); builder.or(docs); bits = builder.build().bits(); // Count how many docs are in our filtered set // TODO make this lazy-loaded only for those that need it? numDocs += bits.cardinality(); } enums.add(new Holder(termsEnum, bits)); } this.enums = enums.toArray(new Holder[enums.size()]); }
From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java
License:Apache License
private static int matchCount(BitSetProducer producer, IndexReader reader) throws IOException { int count = 0; for (LeafReaderContext ctx : reader.leaves()) { final BitSet bitSet = producer.getBitSet(ctx); if (bitSet != null) { count += bitSet.cardinality(); }//from w ww .jav a 2 s. c o m } return count; }
From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java
License:Apache License
@Override public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException { List<PercolateQuery> percolateQueries = locatePercolatorQuery(context.query()); if (percolateQueries.isEmpty()) { return;/*from w w w .j ava2 s . com*/ } boolean singlePercolateQuery = percolateQueries.size() == 1; for (PercolateQuery percolateQuery : percolateQueries) { String fieldName = singlePercolateQuery ? FIELD_NAME_PREFIX : FIELD_NAME_PREFIX + "_" + percolateQuery.getName(); IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher(); Weight weight = percolatorIndexSearcher.createNormalizedWeight(Queries.newNonNestedFilter(), false); Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0)); int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc(); BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc); int[] rootDocsBySlot = null; boolean hasNestedDocs = rootDocs.cardinality() != percolatorIndexSearcher.getIndexReader().numDocs(); if (hasNestedDocs) { rootDocsBySlot = buildRootDocsSlots(rootDocs); } PercolateQuery.QueryStore queryStore = percolateQuery.getQueryStore(); List<LeafReaderContext> ctxs = context.searcher().getIndexReader().leaves(); for (SearchHit hit : hits) { LeafReaderContext ctx = ctxs.get(ReaderUtil.subIndex(hit.docId(), ctxs)); int segmentDocId = hit.docId() - ctx.docBase; Query query = queryStore.getQueries(ctx).apply(segmentDocId); TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc, new Sort(SortField.FIELD_DOC)); if (topDocs.totalHits == 0) { // This hit didn't match with a percolate query, // likely to happen when percolating multiple documents continue; } Map<String, DocumentField> fields = hit.fieldsOrNull(); if (fields == null) { fields = new HashMap<>(); hit.fields(fields); } IntStream slots = convertTopDocsToSlots(topDocs, rootDocsBySlot); fields.put(fieldName, new DocumentField(fieldName, slots.boxed().collect(Collectors.toList()))); } } }
From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java
License:Apache License
static int[] buildRootDocsSlots(BitSet rootDocs) { int slot = 0; int[] rootDocsBySlot = new int[rootDocs.cardinality()]; BitSetIterator iterator = new BitSetIterator(rootDocs, 0); for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) { rootDocsBySlot[slot++] = rootDocId; }/*from w w w.j a v a 2s . com*/ return rootDocsBySlot; }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java
License:Open Source License
/** * Compute the number of live documents. This method is SLOW. */// ww w . j a v a 2 s . co m private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) { final Bits liveDocs = reader.getLiveDocs(); if (roleQueryBits == null) { return 0; } else if (liveDocs == null) { // slow return roleQueryBits.cardinality(); } else { // very slow, but necessary in order to be correct int numDocs = 0; DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost try { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (liveDocs.get(doc)) { numDocs++; } } return numDocs; } catch (IOException e) { throw new UncheckedIOException(e); } } }