List of usage examples for org.apache.lucene.util BitSetIterator BitSetIterator
public BitSetIterator(BitSet bits, long cost)
From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java
License:Open Source License
DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) { super(delegate); flsEnabled = includes != null && !includes.isEmpty(); dlsEnabled = dlsQuery != null;/*from ww w. j a v a2s . c o m*/ if (flsEnabled) { this.includes = includes.toArray(new String[0]); final FieldInfos infos = delegate.getFieldInfos(); final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size()); for (final FieldInfo info : infos) { final String fname = info.name; if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname)) || WildcardMatcher.matchAny(this.includes, fname)) { fi.add(info); } } this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0])); } else { this.includes = null; this.flsFieldInfos = null; } if (dlsEnabled) { try { //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF)) final IndexSearcher searcher = new IndexSearcher(this); searcher.setQueryCache(null); final boolean needsScores = false; final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores); final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); final Scorer preverveScorer = preserveWeight.scorer(this.getContext()); if (preverveScorer != null) { bits.or(preverveScorer.iterator()); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); } catch (Exception e) { throw new RuntimeException(e); } } else { this.liveDocs = null; this.numDocs = -1; } }
From source file:org.apache.solr.handler.ExportWriter.java
License:Apache License
protected void writeDocs(SolrQueryRequest req, IteratorWriter.ItemWriter writer, Sort sort) throws IOException { //Write the data. List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves(); SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort()); int count = 0; int queueSize = 30000; SortQueue queue = new SortQueue(queueSize, sortDoc); SortDoc[] outDocs = new SortDoc[queueSize]; while (count < totalHits) { //long begin = System.nanoTime(); queue.reset();/* ww w .j a v a2 s .c o m*/ SortDoc top = queue.top(); for (int i = 0; i < leaves.size(); i++) { sortDoc.setNextReader(leaves.get(i)); DocIdSetIterator it = new BitSetIterator(sets[i], 0); // cost is not useful here int docId = -1; while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { sortDoc.setValues(docId); if (top.lessThan(sortDoc)) { top.setValues(sortDoc); top = queue.updateTop(); } } } int outDocsIndex = -1; for (int i = 0; i < queueSize; i++) { SortDoc s = queue.pop(); if (s.docId > -1) { outDocs[++outDocsIndex] = s; } } //long end = System.nanoTime(); count += (outDocsIndex + 1); try { for (int i = outDocsIndex; i >= 0; --i) { SortDoc s = outDocs[i]; writer.add((MapWriter) ew -> { writeDoc(s, leaves, ew); s.reset(); }); } } catch (Throwable e) { Throwable ex = e; e.printStackTrace(); while (ex != null) { String m = ex.getMessage(); if (m != null && m.contains("Broken pipe")) { throw new IgnoreException(); } ex = ex.getCause(); } if (e instanceof IOException) { throw ((IOException) e); } else { throw new IOException(e); } } } }
From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) { return new ConstantScoreWeight(this, boost) { @Override//from w ww . j a va 2 s . c om public String toString() { return "weight(delete docs query)"; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc()); Terms terms = leafReader.terms(RoutingFieldMapper.NAME); Predicate<BytesRef> includeInShard = ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, Uid.decodeId(ref.bytes, ref.offset, ref.length), null); return shardId == targetShardId; }; if (terms == null) { // this is the common case - no partitioning and no _routing values // in this case we also don't do anything special with regards to nested docs since we basically delete // by ID and parent and nested all have the same id. assert indexMetaData.isRoutingPartitionedIndex() == false; findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set); } else { final BitSet parentBitSet; if (nestedParentBitSetProducer == null) { parentBitSet = null; } else { parentBitSet = nestedParentBitSetProducer.getBitSet(context); if (parentBitSet == null) { return null; // no matches } } if (indexMetaData.isRoutingPartitionedIndex()) { // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing // this this index is routing partitioned. Visitor visitor = new Visitor(leafReader); TwoPhaseIterator twoPhaseIterator = parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet); return new ConstantScoreScorer(this, score(), twoPhaseIterator); } else { // here we potentially guard the docID consumers with our parent bitset if we have one. // this ensures that we are only marking root documents in the nested case and if necessary // we do a second pass to mark the corresponding children in markChildDocs Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> { if (parentBitSet != null) { return docId -> { if (parentBitSet.get(docId)) { consumer.accept(docId); } }; } return consumer; }; // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete findSplitDocs(RoutingFieldMapper.NAME, ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, null, ref.utf8ToString()); return shardId == targetShardId; }, leafReader, maybeWrapConsumer.apply(bitSet::set)); // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones // with a routing value from the next iteration an delete / select based on the ID. if (terms.getDocCount() != leafReader.maxDoc()) { // this is a special case where some of the docs have no routing values this sucks but it's possible today FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc()); findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader, maybeWrapConsumer.apply(hasRoutingValue::set)); IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set); findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> { if (hasRoutingValue.get(docId) == false) { bitSetConsumer.accept(docId); } }); } } if (parentBitSet != null) { // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc. markChildDocs(parentBitSet, bitSet); } } return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length())); } @Override public boolean isCacheable(LeafReaderContext ctx) { // This is not a regular query, let's not cache it. It wouldn't help // anyway. return false; } }; }
From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java
License:Apache License
static int[] buildRootDocsSlots(BitSet rootDocs) { int slot = 0; int[] rootDocsBySlot = new int[rootDocs.cardinality()]; BitSetIterator iterator = new BitSetIterator(rootDocs, 0); for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) { rootDocsBySlot[slot++] = rootDocId; }/* www . j av a 2 s. c o m*/ return rootDocsBySlot; }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedNumericDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX, MultiValueMode.SUM, MultiValueMode.AVG }) { final NumericDocValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {/*from w w w . j a va 2s. co m*/ final long actual = selected.get(root); long expected = 0; if (mode == MultiValueMode.MAX) { expected = Long.MIN_VALUE; } else if (mode == MultiValueMode.MIN) { expected = Long.MAX_VALUE; } int numValues = 0; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) { expected += values.valueAt(j); } else if (mode == MultiValueMode.MIN) { expected = Math.min(expected, values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, values.valueAt(j)); } ++numValues; } } if (numValues == 0) { expected = missingValue; } else if (mode == MultiValueMode.AVG) { expected = numValues > 1 ? Math.round((double) expected / (double) numValues) : expected; } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedNumericDoubleValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX, MultiValueMode.SUM, MultiValueMode.AVG }) { final NumericDoubleValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {//w w w.j a va 2s . co m final double actual = selected.get(root); double expected = 0.0; if (mode == MultiValueMode.MAX) { expected = Long.MIN_VALUE; } else if (mode == MultiValueMode.MIN) { expected = Long.MAX_VALUE; } int numValues = 0; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) { expected += values.valueAt(j); } else if (mode == MultiValueMode.MIN) { expected = Math.min(expected, values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, values.valueAt(j)); } ++numValues; } } if (numValues == 0) { expected = missingValue; } else if (mode == MultiValueMode.AVG) { expected = expected / numValues; } assertEquals(mode.toString() + " docId=" + root, expected, actual, 0.1); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(SortedBinaryDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (BytesRef missingValue : new BytesRef[] { new BytesRef(), new BytesRef(RandomStrings.randomAsciiOfLength(getRandom(), 8)) }) { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) { final BinaryDocValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {//from w w w. j a v a 2 s. c o m final BytesRef actual = selected.get(root); BytesRef expected = null; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.count(); ++j) { if (expected == null) { expected = BytesRef.deepCopyOf(values.valueAt(j)); } else { if (mode == MultiValueMode.MIN) { expected = expected.compareTo(values.valueAt(j)) <= 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j)); } else if (mode == MultiValueMode.MAX) { expected = expected.compareTo(values.valueAt(j)) > 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j)); } } } } if (expected == null) { expected = missingValue; } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } } }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private void verify(RandomAccessOrds values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) { final SortedDocValues selected = mode.select(values, rootDocs, new BitSetIterator(innerDocs, 0L)); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {/* www .j a v a2s. c o m*/ final int actual = selected.getOrd(root); int expected = -1; for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) { values.setDocument(child); for (int j = 0; j < values.cardinality(); ++j) { if (expected == -1) { expected = (int) values.ordAt(j); } else { if (mode == MultiValueMode.MIN) { expected = Math.min(expected, (int) values.ordAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, (int) values.ordAt(j)); } } } } assertEquals(mode.toString() + " docId=" + root, expected, actual); prevRoot = root; } } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java
License:Open Source License
/** * Compute the number of live documents. This method is SLOW. *//* ww w . j av a 2 s . com*/ private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) { final Bits liveDocs = reader.getLiveDocs(); if (roleQueryBits == null) { return 0; } else if (liveDocs == null) { // slow return roleQueryBits.cardinality(); } else { // very slow, but necessary in order to be correct int numDocs = 0; DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost try { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (liveDocs.get(doc)) { numDocs++; } } return numDocs; } catch (IOException e) { throw new UncheckedIOException(e); } } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java
License:Open Source License
static void intersectScorerAndRoleBits(Scorer scorer, SparseFixedBitSet roleBits, LeafCollector collector, Bits acceptDocs) throws IOException { // ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should // be used first: DocIdSetIterator iterator = ConjunctionDISI.intersectIterators( Arrays.asList(new BitSetIterator(roleBits, roleBits.approximateCardinality()), scorer.iterator())); for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) { if (acceptDocs == null || acceptDocs.get(docId)) { collector.collect(docId);//from w w w . ja va 2 s . co m } } }