List of usage examples for org.apache.lucene.util FixedBitSet FixedBitSet
public FixedBitSet(int numBits)
From source file:DocIdSetBenchmark.java
License:Apache License
protected static FixedBitSet randomSet(int numBits, int numBitsSet) { assert numBitsSet <= numBits; final FixedBitSet set = new FixedBitSet(numBits); if (numBitsSet == numBits) { set.set(0, numBits);//from ww w.j a v a 2 s . co m } else { for (int i = 0; i < numBitsSet; ++i) { while (true) { final int o = RANDOM.nextInt(numBits); if (!set.get(o)) { set.set(o); break; } } } } return set; }
From source file:DocIdSetBenchmark.java
License:Apache License
public static long scoreBuildFixedBitSet(DocIdSet set, int maxDoc) throws IOException { final long start = System.nanoTime(); int dummy = 0; long score = 0; while (System.nanoTime() - start < SECOND) { final FixedBitSet copy = new FixedBitSet(maxDoc); DocIdSetIterator iterator = set.iterator(); if (iterator != null) { copy.or(iterator);// www . ja v a 2 s . com } dummy += copy.hashCode(); ++score; } DUMMY += dummy; return score; }
From source file:com.b2international.index.lucene.DocIdCollector.java
License:Apache License
/** * Creates a document ID collector instance. * @param size the size of the backing bit set. Preferably the maximum document ID. * @see DocIdCollector#create(int)// w w w.ja va2 s.co m */ public DocIdCollector(final int size) { docIds = new FixedBitSet(size); }
From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java
License:Open Source License
DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) { super(delegate); flsEnabled = includes != null && !includes.isEmpty(); dlsEnabled = dlsQuery != null;//from w w w . j a v a 2s .c om if (flsEnabled) { this.includes = includes.toArray(new String[0]); final FieldInfos infos = delegate.getFieldInfos(); final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size()); for (final FieldInfo info : infos) { final String fname = info.name; if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname)) || WildcardMatcher.matchAny(this.includes, fname)) { fi.add(info); } } this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0])); } else { this.includes = null; this.flsFieldInfos = null; } if (dlsEnabled) { try { //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF)) final IndexSearcher searcher = new IndexSearcher(this); searcher.setQueryCache(null); final boolean needsScores = false; final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores); final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); final Scorer preverveScorer = preserveWeight.scorer(this.getContext()); if (preverveScorer != null) { bits.or(preverveScorer.iterator()); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); } catch (Exception e) { throw new RuntimeException(e); } } else { this.liveDocs = null; this.numDocs = -1; } }
From source file:com.greplin.lucene.filter.PhraseFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { List<IndexReader> subReaders = IndexReaders.gatherSubReaders(reader); PhraseFilterMatchList[] results = new PhraseFilterMatchList[subReaders.size()]; int matchCount = 0; int readerNumber = 0; for (IndexReader subReader : subReaders) { SortedSet<TermWithFrequency> termsOrderedByFrequency = Sets.newTreeSet(); for (int i = 0; i < this.terms.length; i++) { Term t = this.terms[i]; termsOrderedByFrequency.add(new TermWithFrequency(t, subReader.docFreq(t), i)); }/* ww w . j a v a2 s .com*/ PhraseFilterMatchList matches = null; TermPositions termPositions = subReader.termPositions(); try { for (TermWithFrequency term : termsOrderedByFrequency) { if (term.docFreq == 0) { break; } termPositions.seek(term.term); if (matches == null) { // If this is the first term, collect all matches that intersect // with the provided initial document set. Intersection intersection = this.intersectionProvider.get(reader); matches = new PhraseFilterMatchList(term.docFreq); while (intersection.advanceToNextIntersection(termPositions)) { int freq = termPositions.freq(); PhraseFilterIntList list = new PhraseFilterIntList(freq); for (int i = 0; i < freq; i++) { list.add(termPositions.nextPosition() - term.offset); } matches.add(termPositions.doc(), list); } } else { // Otherwise, intersect with the existing matches. matches.intersect(termPositions, term.offset); } if (matches.getCount() == 0) { break; } } } finally { termPositions.close(); } if (matches != null) { results[readerNumber] = matches; matchCount += matches.getCount(); } readerNumber++; } final int bitsPerIntPowerLogTwo = 5; // 2^5 = 32 if (matchCount > reader.maxDoc() >> bitsPerIntPowerLogTwo) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result.set(docIds[i] + readerOffset); } } readerOffset += subReaders.get(readerIndex).maxDoc(); } return result; } else if (matchCount == 0) { return DocIdSets.EMPTY; } else { int[] result = new int[matchCount]; int base = 0; int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result[base + i] = docIds[i] + readerOffset; } base += count; } readerOffset += subReaders.get(readerIndex).maxDoc(); } return new SortedIntArrayDocIdSet(result); } }
From source file:com.greplin.lucene.filter.TermsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { FixedBitSet result = new FixedBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();/*from www .j a v a2 s . c om*/ try { for (Term term : this.terms) { td.seek(term); while (td.next()) { result.set(td.doc()); } } } finally { td.close(); } return result; }
From source file:com.kmwllc.search.graph.GraphTermsCollector.java
GraphTermsCollector(String field, int maxDoc, Bits currentResult, DocSet leafNodes) { this.field = field; this.maxDoc = maxDoc; this.collectorTerms = new BytesRefHash(); this.currentResult = currentResult; this.leafNodes = leafNodes; //this.base = base; // TODO: consider creating an edge cache for the compiled autn // for the query out of a given doc. // this.graphEdgeCache = GraphCache.getInstance(); if (bits == null) { bits = new FixedBitSet(maxDoc); }/*from www . ja v a 2s .co m*/ }
From source file:com.kmwllc.search.graph.GraphTermsCollector.java
public DocSet getDocSet() { if (bits == null) { bits = new FixedBitSet(maxDoc); }//from w ww . java 2 s .c om // System.out.println("BIT SET POSITION :" + pos); return new BitDocSet(bits, numHits); }
From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java
License:Apache License
static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid, final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher, List<BytesRef> updatedCtids) throws IOException { final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>(); if (updatedCtids.size() == 0) return visibilityBitSets; ///*from w ww .j a v a2s. co m*/ // build a map of {@link VisibilityInfo} objects by each _prev_ctid // // We use XConstantScoreQuery here so that we exclude deleted docs // final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>(); searcher.search( new XConstantScoreQuery( SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))), new ZomboDBTermsCollector(field) { private SortedDocValues prevCtids; private SortedNumericDocValues xids; private SortedNumericDocValues sequence; private int ord; private int maxdoc; @Override public void collect(int doc) throws IOException { xids.setDocument(doc); sequence.setDocument(doc); long xid = xids.valueAt(0); long seq = sequence.valueAt(0); BytesRef prevCtid = prevCtids.get(doc); List<VisibilityInfo> matchingDocs = map.get(prevCtid); if (matchingDocs == null) map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>()); matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq)); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); xids = context.reader().getSortedNumericDocValues("_xid"); sequence = context.reader().getSortedNumericDocValues("_zdb_seq"); ord = context.ord; maxdoc = context.reader().maxDoc(); } }); if (map.isEmpty()) return visibilityBitSets; // // pick out the first VisibilityInfo for each document that is visible & committed // and build a FixedBitSet for each reader 'ord' that contains visible // documents. A map of these (key'd on reader ord) is what we return. // BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() { /* overloaded to avoid making a copy of the byte array */ @Override public BytesRef toBytesRef() { return new BytesRef(this.bytes(), 0, this.length()); } }; Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid"); TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null); for (List<VisibilityInfo> visibility : map.values()) { CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() { @Override public int compare(VisibilityInfo o1, VisibilityInfo o2) { int cmp = Long.compare(o2.xid, o1.xid); return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp; } }); boolean foundVisible = false; for (VisibilityInfo mapping : visibility) { if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) { // document is not visible to us FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd); if (visibilityBitset == null) visibilityBitSets.put(mapping.readerOrd, visibilityBitset = new FixedBitSet(mapping.maxdoc)); visibilityBitset.set(mapping.docid); } else { foundVisible = true; } } } return visibilityBitSets; }
From source file:de.jetsli.lumeo.util.TermFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { AtomicReader reader = context.reader(); FixedBitSet result = new FixedBitSet(reader.maxDoc()); DocsEnum de = reader.termDocsEnum(acceptDocs, fieldName, bytes, false); if (de == null) return result; int id;/*w ww. j a va 2s. co m*/ while ((id = de.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { result.set(id); } return result; }