List of usage examples for org.apache.lucene.util FixedBitSet nextSetBit
@Override
public int nextSetBit(int index)
From source file:org.apache.solr.search.DocSetUtil.java
License:Apache License
public static DocSet toSmallSet(BitDocSet bitSet) { int sz = bitSet.size(); int[] docs = new int[sz]; FixedBitSet bs = bitSet.getBits(); int doc = -1; for (int i = 0; i < sz; i++) { doc = bs.nextSetBit(doc + 1); docs[i] = doc;// w ww. ja v a 2 s.c o m } return new SortedIntDocSet(docs); }
From source file:org.apache.solr.search.facet.BlockJoin.java
License:Apache License
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */ public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException { FixedBitSet parentBits = parentList.getBits(); DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()); DocIterator iter = childInput.iterator(); int currentParent = -1; while (iter.hasNext()) { int childDoc = iter.nextDoc(); // TODO: skipping if (childDoc <= currentParent) { // use <= since we also allow parents in the input // we already visited this parent continue; }/* w w w . j a va 2 s .c o m*/ currentParent = parentBits.nextSetBit(childDoc); if (currentParent != DocIdSetIterator.NO_MORE_DOCS) { // only collect the parent the first time we skip to it collector.collect(currentParent); } } return collector.getDocSet(); }
From source file:org.apache.solr.search.facet.UniqueSlotAcc.java
License:Apache License
private Object getShardHLL(int slot) throws IOException { FixedBitSet ords = arr[slot]; if (ords == null) return HLLAgg.NO_VALUES; HLL hll = factory.getHLL();//from w w w .j a v a 2s.co m long maxOrd = ords.length(); Hash.LongPair hashResult = new Hash.LongPair(); for (int ord = -1; ++ord < maxOrd;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); // way to avoid recomputing hash across slots? Prob not worth space Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult); // idea: if the set is small enough, just send the hashes? We can add at the top // level or even just do a hash table at the top level. hll.addRaw(hashResult.val1); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("hll", hll.toBytes()); return map; }
From source file:org.apache.solr.search.facet.UniqueSlotAcc.java
License:Apache License
private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique;//from w ww. j ava2 s. co m if (counts != null) { unique = counts[slot]; } else { unique = ords == null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit = 100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList(Math.min(unique, maxExplicit)); long maxOrd = ords.length(); if (ords != null && ords.length() > 0) { for (int ord = 0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; }
From source file:org.elasticsearch.index.search.child.ParentIdsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(UidFieldMapper.NAME); if (terms == null) { return null; }/* w ww .j ava 2s . c o m*/ TermsEnum termsEnum = terms.iterator(null); BytesRef uidSpare = new BytesRef(); BytesRef idSpare = new BytesRef(); if (acceptDocs == null) { acceptDocs = context.reader().getLiveDocs(); } FixedBitSet nonNestedDocs = null; if (nonNestedDocsFilter != null) { nonNestedDocs = (FixedBitSet) nonNestedDocsFilter.getDocIdSet(context, acceptDocs); } DocsEnum docsEnum = null; FixedBitSet result = null; long size = parentIds.size(); for (int i = 0; i < size; i++) { parentIds.get(i, idSpare); Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare); if (termsEnum.seekExact(uidSpare)) { int docId; docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); if (result == null) { docId = docsEnum.nextDoc(); if (docId != DocIdSetIterator.NO_MORE_DOCS) { result = new FixedBitSet(context.reader().maxDoc()); } else { continue; } } else { docId = docsEnum.nextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { continue; } } if (nonNestedDocs != null && !nonNestedDocs.get(docId)) { docId = nonNestedDocs.nextSetBit(docId); } result.set(docId); assert docsEnum.advance(docId + 1) == DocIdSetIterator.NO_MORE_DOCS : "DocId " + docId + " should have been the last one but docId " + docsEnum.docID() + " exists."; } } return result; }
From source file:org.elasticsearch.index.search.child.TopChildrenQuery.java
License:Apache License
int resolveParentDocuments(TopDocs topDocs, SearchContext context, Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) throws Exception { int parentHitsResolved = 0; Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>> parentDocsPerReader = cacheRecycler .hashMap(context.searcher().getIndexReader().leaves().size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves()); AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex); BytesValues.WithOrdinals parentValues = parentChildIndexFieldData.load(subContext) .getBytesValues(parentType); int subDoc = scoreDoc.doc - subContext.docBase; // find the parent id parentValues.setDocument(subDoc); BytesRef parentId = parentValues.nextValue(); if (parentId == null) { // no parent found continue; }/*from w ww.j av a 2 s .c o m*/ // now go over and find the parent doc Id and reader tuple for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { AtomicReader indexReader = atomicReaderContext.reader(); FixedBitSet nonNestedDocs = null; if (nonNestedDocsFilter != null) { nonNestedDocs = (FixedBitSet) nonNestedDocsFilter.getDocIdSet(atomicReaderContext, indexReader.getLiveDocs()); } Terms terms = indexReader.terms(UidFieldMapper.NAME); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(Uid.createUidAsBytes(parentType, parentId))) { continue; } DocsEnum docsEnum = termsEnum.docs(indexReader.getLiveDocs(), null, DocsEnum.FLAG_NONE); int parentDocId = docsEnum.nextDoc(); if (nonNestedDocs != null && !nonNestedDocs.get(parentDocId)) { parentDocId = nonNestedDocs.nextSetBit(parentDocId); } if (parentDocId != DocsEnum.NO_MORE_DOCS) { // we found a match, add it and break Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs = parentDocsPerReader.v() .get(indexReader.getCoreCacheKey()); if (readerParentDocs == null) { readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc()); parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs); } ParentDoc parentDoc = readerParentDocs.v().get(parentDocId); if (parentDoc == null) { parentHitsResolved++; // we have a hit on a parent parentDoc = new ParentDoc(); parentDoc.docId = parentDocId; parentDoc.count = 1; parentDoc.maxScore = scoreDoc.score; parentDoc.sumScores = scoreDoc.score; readerParentDocs.v().put(parentDocId, parentDoc); } else { parentDoc.count++; parentDoc.sumScores += scoreDoc.score; if (scoreDoc.score > parentDoc.maxScore) { parentDoc.maxScore = scoreDoc.score; } } } } } boolean[] states = parentDocsPerReader.v().allocated; Object[] keys = parentDocsPerReader.v().keys; Object[] values = parentDocsPerReader.v().values; for (int i = 0; i < states.length; i++) { if (states[i]) { Recycler.V<IntObjectOpenHashMap<ParentDoc>> value = (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i]; ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class); Arrays.sort(_parentDocs, PARENT_DOC_COMP); parentDocs.v().put(keys[i], _parentDocs); Releasables.release(value); } } Releasables.release(parentDocsPerReader); return parentHitsResolved; }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedNumericDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX, MultiValueMode.SUM, MultiValueMode.AVG }) { final NumericDocValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {/*from w w w.jav a 2 s . co m*/ final long actual = selected.get(root); long expected = 0; if (mode == MultiValueMode.MAX) { expected = Long.MIN_VALUE; } else if (mode == MultiValueMode.MIN) { expected = Long.MAX_VALUE; } int numValues = 0; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) { expected += values.valueAt(j); } else if (mode == MultiValueMode.MIN) { expected = Math.min(expected, values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, values.valueAt(j)); } ++numValues; } } if (numValues == 0) { expected = missingValue; } else if (mode == MultiValueMode.AVG) { expected = numValues > 1 ? Math.round((double) expected / (double) numValues) : expected; } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedNumericDoubleValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX, MultiValueMode.SUM, MultiValueMode.AVG }) { final NumericDoubleValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {//from w w w . j a v a 2 s . c o m final double actual = selected.get(root); double expected = 0.0; if (mode == MultiValueMode.MAX) { expected = Long.MIN_VALUE; } else if (mode == MultiValueMode.MIN) { expected = Long.MAX_VALUE; } int numValues = 0; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) { expected += values.valueAt(j); } else if (mode == MultiValueMode.MIN) { expected = Math.min(expected, values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, values.valueAt(j)); } ++numValues; } } if (numValues == 0) { expected = missingValue; } else if (mode == MultiValueMode.AVG) { expected = expected / numValues; } assertEquals(mode.toString() + " docId=" + root, expected, actual, 0.1); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedBinaryDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (BytesRef missingValue : new BytesRef[] { new BytesRef(), new BytesRef(RandomStrings.randomAsciiOfLength(getRandom(), 8)) }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) { final BinaryDocValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {//from www.j a v a2 s . c o m final BytesRef actual = selected.get(root); BytesRef expected = null; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (expected == null) { expected = BytesRef.deepCopyOf(values.valueAt(j)); } else { if (mode == MultiValueMode.MIN) { expected = expected.compareTo(values.valueAt(j)) <= 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = expected.compareTo(values.valueAt(j)) > 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j)); } } } } if (expected == null) { expected = missingValue; } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(RandomAccessOrds values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) { final SortedDocValues selected = mode.select(values, rootDocs, new BitSetIterator(innerDocs, 0L)); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {/*from w w w . j a v a 2 s . c o m*/ final int actual = selected.getOrd(root); int expected = -1; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.cardinality(); ++j) { if (expected == -1) { expected = (int) values.ordAt(j); } else { if (mode == MultiValueMode.MIN) { expected = Math.min(expected, (int) values.ordAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, (int) values.ordAt(j)); } } } } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } }