List of usage examples for org.apache.lucene.index DocValues unwrapSingleton
public static NumericDocValues unwrapSingleton(SortedNumericDocValues dv)
From source file:org.apache.solr.request.DocValuesStats.java
License:Apache License
public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs, boolean calcDistinct, String[] facet) throws IOException { SchemaField schemaField = searcher.getSchema().getField(fieldName); FieldType ft = schemaField.getType(); StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct); //Initialize facetstats, if facets have been passed in final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length]; int upto = 0; for (String facetField : facet) { SchemaField fsf = searcher.getSchema().getField(facetField); if (fsf.multiValued()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField); }//from w w w. jav a 2 s .com SchemaField facetSchemaField = searcher.getSchema().getField(facetField); facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField, calcDistinct); } // TODO: remove multiValuedFieldCache(), check dv type / uninversion type? final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); SortedSetDocValues si; // for term lookups only OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones if (multiValued) { si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiSortedSetDocValues) si).mapping; } } else { SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName); si = single == null ? null : DocValues.singleton(single); if (single instanceof MultiSortedDocValues) { ordinalMap = ((MultiSortedDocValues) single).mapping; } } if (si == null) { si = DocValues.emptySortedSet(); } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException( "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms"); } int missingDocCountTotal = 0; final int nTerms = (int) si.getValueCount(); // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; Filter filter = docs.getTopFilter(); List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = null; if (dis != null) { disi = dis.iterator(); } if (disi != null) { int docBase = leaf.docBase; if (multiValued) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { sub = DocValues.emptySortedSet(); } final SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap); } else { missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName); if (sub == null) { sub = DocValues.emptySorted(); } missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } } // add results in index order for (int ord = 0; ord < counts.length; ord++) { int count = counts[ord]; if (count > 0) { final BytesRef value = si.lookupOrd(ord); res.accumulate(value, count); for (FieldFacetStats f : facetStats) { f.accumulateTermNum(ord, value); } } } res.addMissing(missingDocCountTotal); if (facetStats.length > 0) { for (FieldFacetStats f : facetStats) { Map<String, StatsValues> facetStatsValues = f.facetStatsValues; f.accumulateMissing(); res.addFacet(f.name, facetStatsValues); } } return res; }
From source file:org.apache.solr.request.IntervalFacets.java
License:Apache License
private void getCountString() throws IOException { Filter filter = docs.getTopFilter(); List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs if (dis == null) { continue; }/*from w ww .ja va 2 s . c om*/ DocIdSetIterator disi = dis.iterator(); if (disi != null) { if (schemaField.multiValued()) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(schemaField.getName()); if (sub == null) { continue; } final SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields accumIntervalsSingle(singleton, disi, dis.bits()); } else { accumIntervalsMulti(sub, disi, dis.bits()); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(schemaField.getName()); if (sub == null) { continue; } accumIntervalsSingle(sub, disi, dis.bits()); } } } }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java
License:Apache License
@Override protected void collectDocs() throws IOException { int domainSize = fcontext.base.size(); if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket? return;// w ww . j av a 2 s . c om } // TODO: refactor some of this logic into a base class boolean countOnly = collectAcc == null && allBucketsAcc == null; boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount(); // Are we expecting many hits per bucket? // FUTURE: pro-rate for nTerms? // FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields. // FUTURE: take into account that bigger ord maps are more expensive than smaller ones // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower // than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each. // this was for heap docvalues produced by UninvertingReader // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data. long domainMultiplier = multiValuedField ? 4L : 2L; boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value, // then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings. // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where // the docid is not used) boolean canDoPerSeg = countOnly && fullRange; boolean accumSeg = manyHitsPerBucket && canDoPerSeg; if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); Filter filter = fcontext.base.getTopFilter(); for (int subIdx = 0; subIdx < leaves.size(); subIdx++) { LeafReaderContext subCtx = leaves.get(subIdx); setNextReaderFirstPhase(subCtx); DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = dis.iterator(); SortedDocValues singleDv = null; SortedSetDocValues multiDv = null; if (multiValuedField) { // TODO: get sub from multi? multiDv = subCtx.reader().getSortedSetDocValues(sf.getName()); if (multiDv == null) { multiDv = DocValues.emptySortedSet(); } // some codecs may optimize SortedSet storage for single-valued fields // this will be null if this is not a wrapped single valued docvalues. if (unwrap_singleValued_multiDv) { singleDv = DocValues.unwrapSingleton(multiDv); } } else { singleDv = subCtx.reader().getSortedDocValues(sf.getName()); if (singleDv == null) { singleDv = DocValues.emptySorted(); } } LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx); if (singleDv != null) { if (accumSeg) { collectPerSeg(singleDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(singleDv, disi, toGlobal); } else { collectDocs(singleDv, disi, toGlobal); } } } else { if (accumSeg) { collectPerSeg(multiDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(multiDv, disi, toGlobal); } else { collectDocs(multiDv, disi, toGlobal); } } } } reuse = null; // better GC }
From source file:org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.java
License:Apache License
private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException { segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName); segmentAccums = ArrayUtil.grow(segmentAccums, (int) segmentSSDV.getValueCount() + 1);//+1 // zero counts, -1 parent Arrays.fill(segmentAccums, 0, (int) segmentSSDV.getValueCount() + 1, 0x00000000ffffffffL); segmentSDV = DocValues.unwrapSingleton(segmentSSDV); return segmentSSDV.getValueCount() != 0;// perhaps we need to count "missings"?? }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testActuallySingleValued() throws IOException { Directory dir = newDirectory();//from ww w . ja v a2 s . c o m IndexWriterConfig iwconfig = newIndexWriterConfig(null); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwconfig); Document doc = new Document(); doc.add(new StringField("foo", "bar", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null); assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(3, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals("bar", value.utf8ToString()); value = v.lookupOrd(1); assertEquals("baz", value.utf8ToString()); ir.close(); dir.close(); }
From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java
License:Apache License
/** * Given a {SortedNumericDocValues}, return a {SortedNumericDoubleValues} * instance that will translate long values to doubles using * {NumericUtils#sortableLongToDouble(long)}. *///from ww w . j a va2s. c o m public static SortedNumericDoubleValues sortableLongBitsToDoubles(SortedNumericDocValues values) { final NumericDocValues singleton = DocValues.unwrapSingleton(values); if (singleton != null) { final NumericDoubleValues doubles; if (singleton instanceof SortableLongBitsNumericDocValues) { doubles = ((SortableLongBitsNumericDocValues) singleton).getDoubleValues(); } else { doubles = new SortableLongBitsToNumericDoubleValues(singleton); } final Bits docsWithField = DocValues.unwrapSingletonBits(values); return singleton(doubles, docsWithField); } else { if (values instanceof SortableLongBitsSortedNumericDocValues) { return ((SortableLongBitsSortedNumericDocValues) values).getDoubleValues(); } else { return new SortableLongBitsToSortedNumericDoubleValues(values); } } }
From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java
License:Apache License
/** * Wrap the provided {SortedNumericDocValues} instance to cast all values to doubles. *///from w w w. ja v a2 s. c om public static SortedNumericDoubleValues castToDouble(final SortedNumericDocValues values) { final NumericDocValues singleton = DocValues.unwrapSingleton(values); if (singleton != null) { final Bits docsWithField = DocValues.unwrapSingletonBits(values); return singleton(new DoubleCastedValues(singleton), docsWithField); } else { return new SortedDoubleCastedValues(values); } }
From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java
License:Apache License
/** * Returns whether the provided values *might* be multi-valued. There is no * guarantee that this method will return <tt>false</tt> in the single-valued case. *//*from w w w .j a v a 2 s.c o m*/ public static boolean isMultiValued(SortedSetDocValues values) { return DocValues.unwrapSingleton(values) == null; }
From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java
License:Apache License
/** * Returns whether the provided values *might* be multi-valued. There is no * guarantee that this method will return <tt>false</tt> in the single-valued case. *//* w w w. j a v a 2 s . c om*/ public static boolean isMultiValued(SortedNumericDocValues values) { return DocValues.unwrapSingleton(values) == null; }
From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java
License:Apache License
/** * If <code>dv</code> is an instance of {RandomAccessOrds}, then return * it, otherwise wrap it into a slow wrapper that implements random access. *//*w w w .j a v a 2 s . co m*/ public static RandomAccessOrds maybeSlowRandomAccessOrds(final SortedSetDocValues dv) { if (dv instanceof RandomAccessOrds) { return (RandomAccessOrds) dv; } else { assert DocValues .unwrapSingleton(dv) == null : "this method expect singleton to return random-access ords"; return new RandomAccessOrds() { int cardinality; long[] ords = new long[0]; int ord; @Override public void setDocument(int docID) { cardinality = 0; dv.setDocument(docID); for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) { ords = ArrayUtil.grow(ords, cardinality + 1); ords[cardinality++] = ord; } ord = 0; } @Override public long nextOrd() { return ords[ord++]; } @Override public BytesRef lookupOrd(long ord) { return dv.lookupOrd(ord); } @Override public long getValueCount() { return dv.getValueCount(); } @Override public long ordAt(int index) { return ords[index]; } @Override public int cardinality() { return cardinality; } }; } }