Example usage for org.apache.lucene.util BitSetIterator BitSetIterator

List of usage examples for org.apache.lucene.util BitSetIterator BitSetIterator

Introduction

In this page you can find the example usage for org.apache.lucene.util BitSetIterator BitSetIterator.

Prototype

public BitSetIterator(BitSet bits, long cost) 

Source Link

Document

Sole constructor.

Usage

From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java

License:Open Source License

DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) {
    super(delegate);

    flsEnabled = includes != null && !includes.isEmpty();
    dlsEnabled = dlsQuery != null;/*from ww w.  j a  v a2s  . c  o m*/

    if (flsEnabled) {
        this.includes = includes.toArray(new String[0]);
        final FieldInfos infos = delegate.getFieldInfos();

        final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size());
        for (final FieldInfo info : infos) {
            final String fname = info.name;
            if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname))
                    || WildcardMatcher.matchAny(this.includes, fname)) {
                fi.add(info);
            }
        }

        this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0]));
    } else {
        this.includes = null;
        this.flsFieldInfos = null;
    }

    if (dlsEnabled) {
        try {

            //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF))
            final IndexSearcher searcher = new IndexSearcher(this);
            searcher.setQueryCache(null);
            final boolean needsScores = false;
            final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores);

            final int maxDoc = in.maxDoc();
            final FixedBitSet bits = new FixedBitSet(maxDoc);
            final Scorer preverveScorer = preserveWeight.scorer(this.getContext());
            if (preverveScorer != null) {
                bits.or(preverveScorer.iterator());
            }

            if (in.hasDeletions()) {
                final Bits oldLiveDocs = in.getLiveDocs();
                assert oldLiveDocs != null;
                final DocIdSetIterator it = new BitSetIterator(bits, 0L);
                for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
                    if (!oldLiveDocs.get(i)) {
                        bits.clear(i);
                    }
                }
            }

            this.liveDocs = bits;
            this.numDocs = bits.cardinality();

        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    } else {
        this.liveDocs = null;
        this.numDocs = -1;
    }
}

From source file:org.apache.solr.handler.ExportWriter.java

License:Apache License

protected void writeDocs(SolrQueryRequest req, IteratorWriter.ItemWriter writer, Sort sort) throws IOException {
    //Write the data.
    List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
    SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort());
    int count = 0;
    int queueSize = 30000;
    SortQueue queue = new SortQueue(queueSize, sortDoc);
    SortDoc[] outDocs = new SortDoc[queueSize];

    while (count < totalHits) {
        //long begin = System.nanoTime();
        queue.reset();/*  ww  w .j  a v a2  s  .c  o m*/
        SortDoc top = queue.top();
        for (int i = 0; i < leaves.size(); i++) {
            sortDoc.setNextReader(leaves.get(i));
            DocIdSetIterator it = new BitSetIterator(sets[i], 0); // cost is not useful here
            int docId = -1;
            while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                sortDoc.setValues(docId);
                if (top.lessThan(sortDoc)) {
                    top.setValues(sortDoc);
                    top = queue.updateTop();
                }
            }
        }

        int outDocsIndex = -1;

        for (int i = 0; i < queueSize; i++) {
            SortDoc s = queue.pop();
            if (s.docId > -1) {
                outDocs[++outDocsIndex] = s;
            }
        }

        //long end = System.nanoTime();

        count += (outDocsIndex + 1);

        try {
            for (int i = outDocsIndex; i >= 0; --i) {
                SortDoc s = outDocs[i];
                writer.add((MapWriter) ew -> {
                    writeDoc(s, leaves, ew);
                    s.reset();
                });
            }
        } catch (Throwable e) {
            Throwable ex = e;
            e.printStackTrace();
            while (ex != null) {
                String m = ex.getMessage();
                if (m != null && m.contains("Broken pipe")) {
                    throw new IgnoreException();
                }
                ex = ex.getCause();
            }

            if (e instanceof IOException) {
                throw ((IOException) e);
            } else {
                throw new IOException(e);
            }
        }
    }
}

From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
    return new ConstantScoreWeight(this, boost) {
        @Override//from w ww . j a  va  2 s  . c  om
        public String toString() {
            return "weight(delete docs query)";
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc());
            Terms terms = leafReader.terms(RoutingFieldMapper.NAME);
            Predicate<BytesRef> includeInShard = ref -> {
                int targetShardId = OperationRouting.generateShardId(indexMetaData,
                        Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                return shardId == targetShardId;
            };
            if (terms == null) {
                // this is the common case - no partitioning and no _routing values
                // in this case we also don't do anything special with regards to nested docs since we basically delete
                // by ID and parent and nested all have the same id.
                assert indexMetaData.isRoutingPartitionedIndex() == false;
                findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
            } else {
                final BitSet parentBitSet;
                if (nestedParentBitSetProducer == null) {
                    parentBitSet = null;
                } else {
                    parentBitSet = nestedParentBitSetProducer.getBitSet(context);
                    if (parentBitSet == null) {
                        return null; // no matches
                    }
                }
                if (indexMetaData.isRoutingPartitionedIndex()) {
                    // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                    // this this index is routing partitioned.
                    Visitor visitor = new Visitor(leafReader);
                    TwoPhaseIterator twoPhaseIterator = parentBitSet == null
                            ? new RoutingPartitionedDocIdSetIterator(visitor)
                            : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
                    return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                } else {
                    // here we potentially guard the docID consumers with our parent bitset if we have one.
                    // this ensures that we are only marking root documents in the nested case and if necessary
                    // we do a second pass to mark the corresponding children in markChildDocs
                    Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> {
                        if (parentBitSet != null) {
                            return docId -> {
                                if (parentBitSet.get(docId)) {
                                    consumer.accept(docId);
                                }
                            };
                        }
                        return consumer;
                    };
                    // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                    findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                        int targetShardId = OperationRouting.generateShardId(indexMetaData, null,
                                ref.utf8ToString());
                        return shardId == targetShardId;
                    }, leafReader, maybeWrapConsumer.apply(bitSet::set));

                    // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                    // with a routing value from the next iteration an delete / select based on the ID.
                    if (terms.getDocCount() != leafReader.maxDoc()) {
                        // this is a special case where some of the docs have no routing values this sucks but it's possible today
                        FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                        findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
                                maybeWrapConsumer.apply(hasRoutingValue::set));
                        IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set);
                        findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
                            if (hasRoutingValue.get(docId) == false) {
                                bitSetConsumer.accept(docId);
                            }
                        });
                    }
                }
                if (parentBitSet != null) {
                    // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc.
                    markChildDocs(parentBitSet, bitSet);
                }
            }

            return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
            // This is not a regular query, let's not cache it. It wouldn't help
            // anyway.
            return false;
        }
    };
}

From source file:org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase.java

License:Apache License

static int[] buildRootDocsSlots(BitSet rootDocs) {
    int slot = 0;
    int[] rootDocsBySlot = new int[rootDocs.cardinality()];
    BitSetIterator iterator = new BitSetIterator(rootDocs, 0);
    for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) {
        rootDocsBySlot[slot++] = rootDocId;
    }/*  www  . j  av a  2  s.  c o  m*/
    return rootDocsBySlot;
}

From source file:org.elasticsearch.search.MultiValueModeTests.java

License:Apache License

private void verify(SortedNumericDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs)
        throws IOException {
    for (long missingValue : new long[] { 0, randomLong() }) {
        for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX,
                MultiValueMode.SUM, MultiValueMode.AVG }) {
            final NumericDocValues selected = mode.select(values, missingValue, rootDocs,
                    new BitSetIterator(innerDocs, 0L), maxDoc);
            int prevRoot = -1;
            for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc
                    ? rootDocs.nextSetBit(root + 1)
                    : -1) {/*from w w w  . j  a va 2s. co m*/
                final long actual = selected.get(root);
                long expected = 0;
                if (mode == MultiValueMode.MAX) {
                    expected = Long.MIN_VALUE;
                } else if (mode == MultiValueMode.MIN) {
                    expected = Long.MAX_VALUE;
                }
                int numValues = 0;
                for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1
                        && child < root; child = innerDocs.nextSetBit(child + 1)) {
                    values.setDocument(child);
                    for (int j = 0; j < values.count(); ++j) {
                        if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) {
                            expected += values.valueAt(j);
                        } else if (mode == MultiValueMode.MIN) {
                            expected = Math.min(expected, values.valueAt(j));
                        } else if (mode == MultiValueMode.MAX) {
                            expected = Math.max(expected, values.valueAt(j));
                        }
                        ++numValues;
                    }
                }
                if (numValues == 0) {
                    expected = missingValue;
                } else if (mode == MultiValueMode.AVG) {
                    expected = numValues > 1 ? Math.round((double) expected / (double) numValues) : expected;
                }

                assertEquals(mode.toString() + " docId=" + root, expected, actual);

                prevRoot = root;
            }
        }
    }
}

From source file:org.elasticsearch.search.MultiValueModeTests.java

License:Apache License

private void verify(SortedNumericDoubleValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs)
        throws IOException {
    for (long missingValue : new long[] { 0, randomLong() }) {
        for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX,
                MultiValueMode.SUM, MultiValueMode.AVG }) {
            final NumericDoubleValues selected = mode.select(values, missingValue, rootDocs,
                    new BitSetIterator(innerDocs, 0L), maxDoc);
            int prevRoot = -1;
            for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc
                    ? rootDocs.nextSetBit(root + 1)
                    : -1) {//w  w  w.j  a  va 2s . co m
                final double actual = selected.get(root);
                double expected = 0.0;
                if (mode == MultiValueMode.MAX) {
                    expected = Long.MIN_VALUE;
                } else if (mode == MultiValueMode.MIN) {
                    expected = Long.MAX_VALUE;
                }
                int numValues = 0;
                for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1
                        && child < root; child = innerDocs.nextSetBit(child + 1)) {
                    values.setDocument(child);
                    for (int j = 0; j < values.count(); ++j) {
                        if (mode == MultiValueMode.SUM || mode == MultiValueMode.AVG) {
                            expected += values.valueAt(j);
                        } else if (mode == MultiValueMode.MIN) {
                            expected = Math.min(expected, values.valueAt(j));
                        } else if (mode == MultiValueMode.MAX) {
                            expected = Math.max(expected, values.valueAt(j));
                        }
                        ++numValues;
                    }
                }
                if (numValues == 0) {
                    expected = missingValue;
                } else if (mode == MultiValueMode.AVG) {
                    expected = expected / numValues;
                }

                assertEquals(mode.toString() + " docId=" + root, expected, actual, 0.1);

                prevRoot = root;
            }
        }
    }
}

From source file:org.elasticsearch.search.MultiValueModeTests.java

License:Apache License

private void verify(SortedBinaryDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs)
        throws IOException {
    for (BytesRef missingValue : new BytesRef[] { new BytesRef(),
            new BytesRef(RandomStrings.randomAsciiOfLength(getRandom(), 8)) }) {
        for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
            final BinaryDocValues selected = mode.select(values, missingValue, rootDocs,
                    new BitSetIterator(innerDocs, 0L), maxDoc);
            int prevRoot = -1;
            for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc
                    ? rootDocs.nextSetBit(root + 1)
                    : -1) {//from   w  w w.  j  a v  a  2  s. c  o  m
                final BytesRef actual = selected.get(root);
                BytesRef expected = null;
                for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1
                        && child < root; child = innerDocs.nextSetBit(child + 1)) {
                    values.setDocument(child);
                    for (int j = 0; j < values.count(); ++j) {
                        if (expected == null) {
                            expected = BytesRef.deepCopyOf(values.valueAt(j));
                        } else {
                            if (mode == MultiValueMode.MIN) {
                                expected = expected.compareTo(values.valueAt(j)) <= 0 ? expected
                                        : BytesRef.deepCopyOf(values.valueAt(j));
                            } else if (mode == MultiValueMode.MAX) {
                                expected = expected.compareTo(values.valueAt(j)) > 0 ? expected
                                        : BytesRef.deepCopyOf(values.valueAt(j));
                            }
                        }
                    }
                }
                if (expected == null) {
                    expected = missingValue;
                }

                assertEquals(mode.toString() + " docId=" + root, expected, actual);

                prevRoot = root;
            }
        }
    }
}

From source file:org.elasticsearch.search.MultiValueModeTests.java

License:Apache License

private void verify(RandomAccessOrds values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs)
        throws IOException {
    for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
        final SortedDocValues selected = mode.select(values, rootDocs, new BitSetIterator(innerDocs, 0L));
        int prevRoot = -1;
        for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc
                ? rootDocs.nextSetBit(root + 1)
                : -1) {/* www .j  a v a2s. c o  m*/
            final int actual = selected.getOrd(root);
            int expected = -1;
            for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1
                    && child < root; child = innerDocs.nextSetBit(child + 1)) {
                values.setDocument(child);
                for (int j = 0; j < values.cardinality(); ++j) {
                    if (expected == -1) {
                        expected = (int) values.ordAt(j);
                    } else {
                        if (mode == MultiValueMode.MIN) {
                            expected = Math.min(expected, (int) values.ordAt(j));
                        } else if (mode == MultiValueMode.MAX) {
                            expected = Math.max(expected, (int) values.ordAt(j));
                        }
                    }
                }
            }

            assertEquals(mode.toString() + " docId=" + root, expected, actual);

            prevRoot = root;
        }
    }
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReader.java

License:Open Source License

/**
 * Compute the number of live documents. This method is SLOW.
 *//*  ww w .  j  av a 2  s  . com*/
private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) {
    final Bits liveDocs = reader.getLiveDocs();
    if (roleQueryBits == null) {
        return 0;
    } else if (liveDocs == null) {
        // slow
        return roleQueryBits.cardinality();
    } else {
        // very slow, but necessary in order to be correct
        int numDocs = 0;
        DocIdSetIterator it = new BitSetIterator(roleQueryBits, 0L); // we don't use the cost
        try {
            for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
                if (liveDocs.get(doc)) {
                    numDocs++;
                }
            }
            return numDocs;
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java

License:Open Source License

static void intersectScorerAndRoleBits(Scorer scorer, SparseFixedBitSet roleBits, LeafCollector collector,
        Bits acceptDocs) throws IOException {
    // ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should
    // be used first:
    DocIdSetIterator iterator = ConjunctionDISI.intersectIterators(
            Arrays.asList(new BitSetIterator(roleBits, roleBits.approximateCardinality()), scorer.iterator()));
    for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(docId)) {
            collector.collect(docId);//from w w w  . ja  va 2  s  . co m
        }
    }
}