List of usage examples for org.apache.lucene.util FixedBitSet length
@Override
public int length()
From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs) throws IOException { FixedBitSet checkBits; final LeafReader reader = context.reader(); final int maxDoc = reader.maxDoc(); BitSet finalBits = new SparseFixedBitSet(maxDoc); if (acceptDocs == null) { checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs()); if (checkBits == null) { // all live checkBits = new FixedBitSet(maxDoc); checkBits.set(0, checkBits.length()); }//from w w w. j a v a 2 s.c om } else { checkBits = BitsUtils.bits2FixedBitSet(acceptDocs); } @Nullable final Terms terms = reader.terms(this.field); if (terms != null) { final int termsDocCount = terms.getDocCount(); if (termsDocCount != 0) { if (termsDocCount == maxDoc) { // all matching finalBits = checkBits; } else { @Nullable final Terms t = reader.terms(this.field); if (t != null) { PostingsEnum pe = null; final TermsEnum te = t.iterator(null); int docId; while (te.next() != null) { pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE); while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (checkBits.getAndClear(docId)) { finalBits.set(docId); } } } } } } } return new BitDocIdSet(finalBits); }
From source file:org.apache.solr.search.CitationLRUCache.java
License:Apache License
private void warmIncrementally(SolrIndexSearcher searcher, SolrCache<K, V> old) throws IOException { if (regenerator == null) return;//from w ww . j a v a2 s. c om //System.out.println("regenerator: " + regenerator); Map<String, List<String>> fields = getFields(searcher, this.identifierFields); if (fields.get("textClasses").size() > 0 || fields.get("textClassesMV").size() > 0) { synchronized (map) { treatIdentifiersAsText = true; } } long warmingStartTime = System.currentTimeMillis(); CitationLRUCache<K, V> other = (CitationLRUCache<K, V>) old; // collect ids of documents that need to be reloaded/regenerated during this // warmup run //System.out.println("searcher: " + searcher.toString()); //System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc()); FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc()); //System.out.println("version=" + searcher.getIndexReader().getVersion()); //try { //System.out.println("commit=" + searcher.getIndexReader().getIndexCommit()); //} catch (IOException e2) { // TODO Auto-generated catch block //e2.printStackTrace(); //} // for (IndexReaderContext c : searcher.getTopReaderContext().children()) { // //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey()); // } // for (IndexReaderContext l : searcher.getIndexReader().leaves()) { // //System.out.println(l); // } Bits liveDocs = searcher.getAtomicReader().getLiveDocs(); //System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + liveDocs.length()); //System.out.println("numDeletes=" + searcher.getAtomicReader().numDeletedDocs()); if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too //searcher.getAtomicReader().getContext().children().size() //other.map.clear(); // force regeneration toRefresh.set(0, toRefresh.length()); // Build the mapping from indexed values into lucene ids // this must always be available, so we build it no matter what... // XXX: make it update only the necessary IDs (not the whole index) unInvertedTheDamnThing(searcher.getAtomicReader(), fields, liveDocs, new KVSetter() { @SuppressWarnings("unchecked") @Override public void set(int docbase, int docid, Object value) { put((K) value, (V) (Integer) (docbase + docid)); } }); } else if (liveDocs != null) { Integer luceneId; for (V v : other.map.values()) { luceneId = ((Integer) v); if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated //System.out.println("Found deleted: " + luceneId); // retrieve all citations/references for this luceneId and mark these docs to be refreshed } } for (int i = 0; i < toRefresh.length(); i++) { if (liveDocs.get(i)) { toRefresh.set(i); } } } // warm entries if (isAutowarmingOn()) { Object[] keys, vals = null; // Don't do the autowarming in the synchronized block, just pull out the keys and values. synchronized (other.map) { int sz = autowarm.getWarmCount(other.map.size()); keys = new Object[sz]; vals = new Object[sz]; Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator(); // iteration goes from oldest (least recently used) to most recently used, // so we need to skip over the oldest entries. int skip = other.map.size() - sz; for (int i = 0; i < skip; i++) iter.next(); for (int i = 0; i < sz; i++) { Map.Entry<K, V> entry = iter.next(); keys[i] = entry.getKey(); vals[i] = entry.getValue(); } } // autowarm from the oldest to the newest entries so that the ordering will be // correct in the new cache. for (int i = 0; i < keys.length; i++) { try { boolean continueRegen = true; if (isModified(liveDocs, keys[i], vals[i])) { toRefresh.set((Integer) keys[i]); } else { continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]); } if (!continueRegen) break; } catch (Throwable e) { SolrException.log(log, "Error during auto-warming of key:" + keys[i], e); } } } warmupTime = System.currentTimeMillis() - warmingStartTime; }
From source file:org.apache.solr.search.facet.UniqueSlotAcc.java
License:Apache License
@Override public void reset() { counts = null;/*from ww w .j av a 2s . c om*/ for (FixedBitSet bits : arr) { if (bits == null) continue; bits.clear(0, bits.length()); } }
From source file:org.apache.solr.search.facet.UniqueSlotAcc.java
License:Apache License
private Object getShardHLL(int slot) throws IOException { FixedBitSet ords = arr[slot]; if (ords == null) return HLLAgg.NO_VALUES; HLL hll = factory.getHLL();/*from w ww . jav a2 s.c o m*/ long maxOrd = ords.length(); Hash.LongPair hashResult = new Hash.LongPair(); for (int ord = -1; ++ord < maxOrd;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); // way to avoid recomputing hash across slots? Prob not worth space Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult); // idea: if the set is small enough, just send the hashes? We can add at the top // level or even just do a hash table at the top level. hll.addRaw(hashResult.val1); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("hll", hll.toBytes()); return map; }
From source file:org.apache.solr.search.facet.UniqueSlotAcc.java
License:Apache License
private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique;/*from www. j av a2s . c o m*/ if (counts != null) { unique = counts[slot]; } else { unique = ords == null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit = 100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList(Math.min(unique, maxExplicit)); long maxOrd = ords.length(); if (ords != null && ords.length() > 0) { for (int ord = 0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; }
From source file:org.elasticsearch.index.cache.docset.simple.SimpleDocSetCache.java
License:Apache License
@Override public ContextDocIdSet obtain(AtomicReaderContext context) { Queue<FixedBitSet> docIdSets = cache.get(context.reader().getCoreCacheKey()); if (docIdSets == null) { if (context.reader() instanceof SegmentReader) { ((SegmentReader) context.reader()).addCoreClosedListener(this); }//from w w w. ja v a 2 s . co m cache.put(context.reader().getCoreCacheKey(), ConcurrentCollections.<FixedBitSet>newQueue()); return new ContextDocIdSet(context, new FixedBitSet(context.reader().maxDoc())); } FixedBitSet docIdSet = docIdSets.poll(); if (docIdSet == null) { docIdSet = new FixedBitSet(context.reader().maxDoc()); } else { docIdSet.clear(0, docIdSet.length()); } return new ContextDocIdSet(context, docIdSet); }
From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java
License:Apache License
public void testNestedSorting(MultiValueMode sortMode) throws IOException { final String[] values = new String[randomIntBetween(2, 20)]; for (int i = 0; i < values.length; ++i) { values[i] = TestUtil.randomSimpleString(getRandom()); }/*from w ww. j a v a2s. c o m*/ final int numParents = scaledRandomIntBetween(10, 3072); List<Document> docs = new ArrayList<>(); FixedBitSet parents = new FixedBitSet(64); for (int i = 0; i < numParents; ++i) { docs.clear(); final int numChildren = randomInt(4); for (int j = 0; j < numChildren; ++j) { final Document child = new Document(); final int numValues = randomInt(3); for (int k = 0; k < numValues; ++k) { final String value = RandomPicks.randomFrom(getRandom(), values); addField(child, "text", value); } docs.add(child); } final Document parent = new Document(); parent.add(new StringField("type", "parent", Store.YES)); final String value = RandomPicks.randomFrom(getRandom(), values); if (value != null) { addField(parent, "text", value); } docs.add(parent); int bit = parents.prevSetBit(parents.length() - 1) + docs.size(); parents = FixedBitSet.ensureCapacity(parents, bit); parents.set(bit); writer.addDocuments(docs); if (randomInt(10) == 0) { writer.commit(); } } DirectoryReader directoryReader = DirectoryReader.open(writer, true); directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(new Index("test"), 0)); IndexSearcher searcher = new IndexSearcher(directoryReader); IndexFieldData<?> fieldData = getForField("text"); final Object missingValue; switch (randomInt(4)) { case 0: missingValue = "_first"; break; case 1: missingValue = "_last"; break; case 2: missingValue = new BytesRef(RandomPicks.randomFrom(getRandom(), values)); break; default: missingValue = new BytesRef(TestUtil.randomSimpleString(getRandom())); break; } Query parentFilter = new TermQuery(new Term("type", "parent")); Query childFilter = Queries.not(parentFilter); Nested nested = createNested(searcher, parentFilter, childFilter); BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested); ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None); Sort sort = new Sort(new SortField("text", nestedComparatorSource)); TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort); assertTrue(topDocs.scoreDocs.length > 0); BytesRef previous = null; for (int i = 0; i < topDocs.scoreDocs.length; ++i) { final int docID = topDocs.scoreDocs[i].doc; assertTrue("expected " + docID + " to be a parent", parents.get(docID)); BytesRef cmpValue = null; for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) { String[] sVals = searcher.doc(child).getValues("text"); final BytesRef[] vals; if (sVals.length == 0) { vals = new BytesRef[0]; } else { vals = new BytesRef[sVals.length]; for (int j = 0; j < vals.length; ++j) { vals[j] = new BytesRef(sVals[j]); } } for (BytesRef value : vals) { if (cmpValue == null) { cmpValue = value; } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) { cmpValue = value; } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) { cmpValue = value; } } } if (cmpValue == null) { if ("_first".equals(missingValue)) { cmpValue = new BytesRef(); } else if ("_last".equals(missingValue) == false) { cmpValue = (BytesRef) missingValue; } } if (previous != null && cmpValue != null) { assertTrue(previous.utf8ToString() + " / " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0); } previous = cmpValue; } searcher.getIndexReader().close(); }
From source file:org.elasticsearch.index.seqno.CountedBitSetTests.java
License:Apache License
public void testCompareToFixedBitset() { int numBits = (short) randomIntBetween(8, 4096); final FixedBitSet fixedBitSet = new FixedBitSet(numBits); final CountedBitSet countedBitSet = new CountedBitSet((short) numBits); for (int i = 0; i < numBits; i++) { if (randomBoolean()) { fixedBitSet.set(i);/* w ww .jav a 2 s .com*/ countedBitSet.set(i); } assertThat(countedBitSet.cardinality(), equalTo(fixedBitSet.cardinality())); assertThat(countedBitSet.length(), equalTo(fixedBitSet.length())); } for (int i = 0; i < numBits; i++) { assertThat(countedBitSet.get(i), equalTo(fixedBitSet.get(i))); } }
From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) { return new ConstantScoreWeight(this, boost) { @Override//from ww w . j a v a 2 s .c o m public String toString() { return "weight(delete docs query)"; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc()); Terms terms = leafReader.terms(RoutingFieldMapper.NAME); Predicate<BytesRef> includeInShard = ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, Uid.decodeId(ref.bytes, ref.offset, ref.length), null); return shardId == targetShardId; }; if (terms == null) { // this is the common case - no partitioning and no _routing values // in this case we also don't do anything special with regards to nested docs since we basically delete // by ID and parent and nested all have the same id. assert indexMetaData.isRoutingPartitionedIndex() == false; findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set); } else { final BitSet parentBitSet; if (nestedParentBitSetProducer == null) { parentBitSet = null; } else { parentBitSet = nestedParentBitSetProducer.getBitSet(context); if (parentBitSet == null) { return null; // no matches } } if (indexMetaData.isRoutingPartitionedIndex()) { // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing // this this index is routing partitioned. Visitor visitor = new Visitor(leafReader); TwoPhaseIterator twoPhaseIterator = parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet); return new ConstantScoreScorer(this, score(), twoPhaseIterator); } else { // here we potentially guard the docID consumers with our parent bitset if we have one. // this ensures that we are only marking root documents in the nested case and if necessary // we do a second pass to mark the corresponding children in markChildDocs Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> { if (parentBitSet != null) { return docId -> { if (parentBitSet.get(docId)) { consumer.accept(docId); } }; } return consumer; }; // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete findSplitDocs(RoutingFieldMapper.NAME, ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, null, ref.utf8ToString()); return shardId == targetShardId; }, leafReader, maybeWrapConsumer.apply(bitSet::set)); // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones // with a routing value from the next iteration an delete / select based on the ID. if (terms.getDocCount() != leafReader.maxDoc()) { // this is a special case where some of the docs have no routing values this sucks but it's possible today FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc()); findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader, maybeWrapConsumer.apply(hasRoutingValue::set)); IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set); findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> { if (hasRoutingValue.get(docId) == false) { bitSetConsumer.accept(docId); } }); } } if (parentBitSet != null) { // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc. markChildDocs(parentBitSet, bitSet); } } return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length())); } @Override public boolean isCacheable(LeafReaderContext ctx) { // This is not a regular query, let's not cache it. It wouldn't help // anyway. return false; } }; }
From source file:org.elasticsearch.search.MultiValueModeTests.java
License:Apache License
private static FixedBitSet randomInnerDocs(FixedBitSet rootDocs) { FixedBitSet innerDocs = new FixedBitSet(rootDocs.length()); for (int i = 0; i < innerDocs.length(); ++i) { if (!rootDocs.get(i) && randomBoolean()) { innerDocs.set(i);// ww w . j a v a 2 s . c o m } } return innerDocs; }