Example usage for org.apache.lucene.util FixedBitSet length

List of usage examples for org.apache.lucene.util FixedBitSet length

Introduction

In this page you can find the example usage for org.apache.lucene.util FixedBitSet length.

Prototype

@Override
    public int length() 

Source Link

Usage

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }//from   w  w w. j  a  v  a 2  s.c  om
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:org.apache.solr.search.CitationLRUCache.java

License:Apache License

private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K, V> old) throws IOException {
    if (regenerator == null)
        return;//from w ww .  j a v a2 s. c  om

    //System.out.println("regenerator: " + regenerator);

    Map<String, List<String>> fields = getFields(searcher, this.identifierFields);
    if (fields.get("textClasses").size() > 0 || fields.get("textClassesMV").size() > 0) {
        synchronized (map) {
            treatIdentifiersAsText = true;
        }
    }

    long warmingStartTime = System.currentTimeMillis();
    CitationLRUCache<K, V> other = (CitationLRUCache<K, V>) old;

    // collect ids of documents that need to be reloaded/regenerated during this
    // warmup run
    //System.out.println("searcher: " + searcher.toString());
    //System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc());
    FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc());

    //System.out.println("version=" + searcher.getIndexReader().getVersion());
    //try {
    //System.out.println("commit=" + searcher.getIndexReader().getIndexCommit());
    //} catch (IOException e2) {
    // TODO Auto-generated catch block
    //e2.printStackTrace();
    //}

    //    for (IndexReaderContext c : searcher.getTopReaderContext().children()) {
    //       //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey());
    //    }

    //    for (IndexReaderContext l : searcher.getIndexReader().leaves()) {
    //       //System.out.println(l);
    //    }

    Bits liveDocs = searcher.getAtomicReader().getLiveDocs();
    //System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length());
    //System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs());

    if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too

        //searcher.getAtomicReader().getContext().children().size()

        //other.map.clear(); // force regeneration
        toRefresh.set(0, toRefresh.length());

        // Build the mapping from indexed values into lucene ids
        // this must always be available, so we build it no matter what...
        // XXX: make it update only the necessary IDs (not the whole index)
        unInvertedTheDamnThing(searcher.getAtomicReader(), fields, liveDocs, new KVSetter() {
            @SuppressWarnings("unchecked")
            @Override
            public void set(int docbase, int docid, Object value) {
                put((K) value, (V) (Integer) (docbase + docid));
            }
        });

    } else if (liveDocs != null) {

        Integer luceneId;
        for (V v : other.map.values()) {
            luceneId = ((Integer) v);
            if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated
                //System.out.println("Found deleted: " + luceneId);
                // retrieve all citations/references for this luceneId and mark these docs to be refreshed
            }
        }

        for (int i = 0; i < toRefresh.length(); i++) {
            if (liveDocs.get(i)) {
                toRefresh.set(i);
            }
        }
    }

    // warm entries
    if (isAutowarmingOn()) {
        Object[] keys, vals = null;

        // Don't do the autowarming in the synchronized block, just pull out the keys and values.
        synchronized (other.map) {

            int sz = autowarm.getWarmCount(other.map.size());

            keys = new Object[sz];
            vals = new Object[sz];

            Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator();

            // iteration goes from oldest (least recently used) to most recently used,
            // so we need to skip over the oldest entries.
            int skip = other.map.size() - sz;
            for (int i = 0; i < skip; i++)
                iter.next();

            for (int i = 0; i < sz; i++) {
                Map.Entry<K, V> entry = iter.next();
                keys[i] = entry.getKey();
                vals[i] = entry.getValue();
            }
        }

        // autowarm from the oldest to the newest entries so that the ordering will be
        // correct in the new cache.
        for (int i = 0; i < keys.length; i++) {
            try {
                boolean continueRegen = true;
                if (isModified(liveDocs, keys[i], vals[i])) {
                    toRefresh.set((Integer) keys[i]);
                } else {
                    continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
                }
                if (!continueRegen)
                    break;
            } catch (Throwable e) {
                SolrException.log(log, "Error during auto-warming of key:" + keys[i], e);
            }
        }
    }

    warmupTime = System.currentTimeMillis() - warmingStartTime;
}

From source file:org.apache.solr.search.facet.UniqueSlotAcc.java

License:Apache License

@Override
public void reset() {
    counts = null;/*from  ww  w .j av a  2s . c  om*/
    for (FixedBitSet bits : arr) {
        if (bits == null)
            continue;
        bits.clear(0, bits.length());
    }
}

From source file:org.apache.solr.search.facet.UniqueSlotAcc.java

License:Apache License

private Object getShardHLL(int slot) throws IOException {
    FixedBitSet ords = arr[slot];
    if (ords == null)
        return HLLAgg.NO_VALUES;

    HLL hll = factory.getHLL();/*from   w  ww .  jav a2  s.c  o m*/
    long maxOrd = ords.length();
    Hash.LongPair hashResult = new Hash.LongPair();
    for (int ord = -1; ++ord < maxOrd;) {
        ord = ords.nextSetBit(ord);
        if (ord == DocIdSetIterator.NO_MORE_DOCS)
            break;
        BytesRef val = lookupOrd(ord);
        // way to avoid recomputing hash across slots?  Prob not worth space
        Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult);
        // idea: if the set is small enough, just send the hashes?  We can add at the top
        // level or even just do a hash table at the top level.
        hll.addRaw(hashResult.val1);
    }

    SimpleOrderedMap map = new SimpleOrderedMap();
    map.add("hll", hll.toBytes());
    return map;
}

From source file:org.apache.solr.search.facet.UniqueSlotAcc.java

License:Apache License

private Object getShardValue(int slot) throws IOException {
    if (factory != null)
        return getShardHLL(slot);
    FixedBitSet ords = arr[slot];
    int unique;/*from   www. j av  a2s  . c o  m*/
    if (counts != null) {
        unique = counts[slot];
    } else {
        unique = ords == null ? 0 : ords.cardinality();
    }

    SimpleOrderedMap map = new SimpleOrderedMap();
    map.add("unique", unique);
    map.add("nTerms", nTerms);

    int maxExplicit = 100;
    // TODO: make configurable
    // TODO: share values across buckets
    if (unique > 0) {

        List lst = new ArrayList(Math.min(unique, maxExplicit));

        long maxOrd = ords.length();
        if (ords != null && ords.length() > 0) {
            for (int ord = 0; lst.size() < maxExplicit;) {
                ord = ords.nextSetBit(ord);
                if (ord == DocIdSetIterator.NO_MORE_DOCS)
                    break;
                BytesRef val = lookupOrd(ord);
                Object o = field.getType().toObject(field, val);
                lst.add(o);
                if (++ord >= maxOrd)
                    break;
            }
        }

        map.add("vals", lst);
    }

    return map;
}

From source file:org.elasticsearch.index.cache.docset.simple.SimpleDocSetCache.java

License:Apache License

@Override
public ContextDocIdSet obtain(AtomicReaderContext context) {
    Queue<FixedBitSet> docIdSets = cache.get(context.reader().getCoreCacheKey());
    if (docIdSets == null) {
        if (context.reader() instanceof SegmentReader) {
            ((SegmentReader) context.reader()).addCoreClosedListener(this);
        }//from w w w. ja v a 2  s . co  m
        cache.put(context.reader().getCoreCacheKey(), ConcurrentCollections.<FixedBitSet>newQueue());
        return new ContextDocIdSet(context, new FixedBitSet(context.reader().maxDoc()));
    }
    FixedBitSet docIdSet = docIdSets.poll();
    if (docIdSet == null) {
        docIdSet = new FixedBitSet(context.reader().maxDoc());
    } else {
        docIdSet.clear(0, docIdSet.length());
    }
    return new ContextDocIdSet(context, docIdSet);
}

From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java

License:Apache License

public void testNestedSorting(MultiValueMode sortMode) throws IOException {
    final String[] values = new String[randomIntBetween(2, 20)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = TestUtil.randomSimpleString(getRandom());
    }/*from  w  ww. j a v  a2s.  c  o m*/
    final int numParents = scaledRandomIntBetween(10, 3072);
    List<Document> docs = new ArrayList<>();
    FixedBitSet parents = new FixedBitSet(64);
    for (int i = 0; i < numParents; ++i) {
        docs.clear();
        final int numChildren = randomInt(4);
        for (int j = 0; j < numChildren; ++j) {
            final Document child = new Document();
            final int numValues = randomInt(3);
            for (int k = 0; k < numValues; ++k) {
                final String value = RandomPicks.randomFrom(getRandom(), values);
                addField(child, "text", value);
            }
            docs.add(child);
        }
        final Document parent = new Document();
        parent.add(new StringField("type", "parent", Store.YES));
        final String value = RandomPicks.randomFrom(getRandom(), values);
        if (value != null) {
            addField(parent, "text", value);
        }
        docs.add(parent);
        int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
        parents = FixedBitSet.ensureCapacity(parents, bit);
        parents.set(bit);
        writer.addDocuments(docs);
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    DirectoryReader directoryReader = DirectoryReader.open(writer, true);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(new Index("test"), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    IndexFieldData<?> fieldData = getForField("text");
    final Object missingValue;
    switch (randomInt(4)) {
    case 0:
        missingValue = "_first";
        break;
    case 1:
        missingValue = "_last";
        break;
    case 2:
        missingValue = new BytesRef(RandomPicks.randomFrom(getRandom(), values));
        break;
    default:
        missingValue = new BytesRef(TestUtil.randomSimpleString(getRandom()));
        break;
    }
    Query parentFilter = new TermQuery(new Term("type", "parent"));
    Query childFilter = Queries.not(parentFilter);
    Nested nested = createNested(searcher, parentFilter, childFilter);
    BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData,
            missingValue, sortMode, nested);
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter),
            new QueryBitSetProducer(parentFilter), ScoreMode.None);
    Sort sort = new Sort(new SortField("text", nestedComparatorSource));
    TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
    assertTrue(topDocs.scoreDocs.length > 0);
    BytesRef previous = null;
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final int docID = topDocs.scoreDocs[i].doc;
        assertTrue("expected " + docID + " to be a parent", parents.get(docID));
        BytesRef cmpValue = null;
        for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
            String[] sVals = searcher.doc(child).getValues("text");
            final BytesRef[] vals;
            if (sVals.length == 0) {
                vals = new BytesRef[0];
            } else {
                vals = new BytesRef[sVals.length];
                for (int j = 0; j < vals.length; ++j) {
                    vals[j] = new BytesRef(sVals[j]);
                }
            }
            for (BytesRef value : vals) {
                if (cmpValue == null) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
                    cmpValue = value;
                }
            }
        }
        if (cmpValue == null) {
            if ("_first".equals(missingValue)) {
                cmpValue = new BytesRef();
            } else if ("_last".equals(missingValue) == false) {
                cmpValue = (BytesRef) missingValue;
            }
        }
        if (previous != null && cmpValue != null) {
            assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(),
                    previous.compareTo(cmpValue) <= 0);
        }
        previous = cmpValue;
    }
    searcher.getIndexReader().close();
}

From source file:org.elasticsearch.index.seqno.CountedBitSetTests.java

License:Apache License

public void testCompareToFixedBitset() {
    int numBits = (short) randomIntBetween(8, 4096);
    final FixedBitSet fixedBitSet = new FixedBitSet(numBits);
    final CountedBitSet countedBitSet = new CountedBitSet((short) numBits);

    for (int i = 0; i < numBits; i++) {
        if (randomBoolean()) {
            fixedBitSet.set(i);/* w ww  .jav a 2 s .com*/
            countedBitSet.set(i);
        }
        assertThat(countedBitSet.cardinality(), equalTo(fixedBitSet.cardinality()));
        assertThat(countedBitSet.length(), equalTo(fixedBitSet.length()));
    }

    for (int i = 0; i < numBits; i++) {
        assertThat(countedBitSet.get(i), equalTo(fixedBitSet.get(i)));
    }
}

From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
    return new ConstantScoreWeight(this, boost) {
        @Override//from   ww w  . j a  v  a  2 s .c o  m
        public String toString() {
            return "weight(delete docs query)";
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc());
            Terms terms = leafReader.terms(RoutingFieldMapper.NAME);
            Predicate<BytesRef> includeInShard = ref -> {
                int targetShardId = OperationRouting.generateShardId(indexMetaData,
                        Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                return shardId == targetShardId;
            };
            if (terms == null) {
                // this is the common case - no partitioning and no _routing values
                // in this case we also don't do anything special with regards to nested docs since we basically delete
                // by ID and parent and nested all have the same id.
                assert indexMetaData.isRoutingPartitionedIndex() == false;
                findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
            } else {
                final BitSet parentBitSet;
                if (nestedParentBitSetProducer == null) {
                    parentBitSet = null;
                } else {
                    parentBitSet = nestedParentBitSetProducer.getBitSet(context);
                    if (parentBitSet == null) {
                        return null; // no matches
                    }
                }
                if (indexMetaData.isRoutingPartitionedIndex()) {
                    // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                    // this this index is routing partitioned.
                    Visitor visitor = new Visitor(leafReader);
                    TwoPhaseIterator twoPhaseIterator = parentBitSet == null
                            ? new RoutingPartitionedDocIdSetIterator(visitor)
                            : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
                    return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                } else {
                    // here we potentially guard the docID consumers with our parent bitset if we have one.
                    // this ensures that we are only marking root documents in the nested case and if necessary
                    // we do a second pass to mark the corresponding children in markChildDocs
                    Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> {
                        if (parentBitSet != null) {
                            return docId -> {
                                if (parentBitSet.get(docId)) {
                                    consumer.accept(docId);
                                }
                            };
                        }
                        return consumer;
                    };
                    // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                    findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                        int targetShardId = OperationRouting.generateShardId(indexMetaData, null,
                                ref.utf8ToString());
                        return shardId == targetShardId;
                    }, leafReader, maybeWrapConsumer.apply(bitSet::set));

                    // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                    // with a routing value from the next iteration an delete / select based on the ID.
                    if (terms.getDocCount() != leafReader.maxDoc()) {
                        // this is a special case where some of the docs have no routing values this sucks but it's possible today
                        FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                        findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
                                maybeWrapConsumer.apply(hasRoutingValue::set));
                        IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set);
                        findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
                            if (hasRoutingValue.get(docId) == false) {
                                bitSetConsumer.accept(docId);
                            }
                        });
                    }
                }
                if (parentBitSet != null) {
                    // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc.
                    markChildDocs(parentBitSet, bitSet);
                }
            }

            return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
            // This is not a regular query, let's not cache it. It wouldn't help
            // anyway.
            return false;
        }
    };
}

From source file:org.elasticsearch.search.MultiValueModeTests.java

License:Apache License

private static FixedBitSet randomInnerDocs(FixedBitSet rootDocs) {
    FixedBitSet innerDocs = new FixedBitSet(rootDocs.length());
    for (int i = 0; i < innerDocs.length(); ++i) {
        if (!rootDocs.get(i) && randomBoolean()) {
            innerDocs.set(i);//  ww  w  . j  a v  a  2 s . c  o  m
        }
    }
    return innerDocs;
}