List of usage examples for org.apache.lucene.index DocValues emptySorted
public static final SortedDocValues emptySorted()
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen();//from w ww . j a v a2 s . c o m OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; if (map.owner == getCoreCacheKey() && merging == false) { cachedOrdMaps.put(field, map); } } return dv; } } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map, totalCost); }
From source file:org.apache.solr.request.DocValuesStats.java
License:Apache License
public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs, boolean calcDistinct, String[] facet) throws IOException { SchemaField schemaField = searcher.getSchema().getField(fieldName); FieldType ft = schemaField.getType(); StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct); //Initialize facetstats, if facets have been passed in final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length]; int upto = 0; for (String facetField : facet) { SchemaField fsf = searcher.getSchema().getField(facetField); if (fsf.multiValued()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField); }//from w ww . j av a2s.c om SchemaField facetSchemaField = searcher.getSchema().getField(facetField); facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField, calcDistinct); } // TODO: remove multiValuedFieldCache(), check dv type / uninversion type? final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); SortedSetDocValues si; // for term lookups only OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones if (multiValued) { si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiSortedSetDocValues) si).mapping; } } else { SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName); si = single == null ? null : DocValues.singleton(single); if (single instanceof MultiSortedDocValues) { ordinalMap = ((MultiSortedDocValues) single).mapping; } } if (si == null) { si = DocValues.emptySortedSet(); } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException( "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms"); } int missingDocCountTotal = 0; final int nTerms = (int) si.getValueCount(); // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; Filter filter = docs.getTopFilter(); List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = null; if (dis != null) { disi = dis.iterator(); } if (disi != null) { int docBase = leaf.docBase; if (multiValued) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { sub = DocValues.emptySortedSet(); } final SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap); } else { missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName); if (sub == null) { sub = DocValues.emptySorted(); } missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } } // add results in index order for (int ord = 0; ord < counts.length; ord++) { int count = counts[ord]; if (count > 0) { final BytesRef value = si.lookupOrd(ord); res.accumulate(value, count); for (FieldFacetStats f : facetStats) { f.accumulateTermNum(ord, value); } } } res.addMissing(missingDocCountTotal); if (facetStats.length > 0) { for (FieldFacetStats f : facetStats) { Map<String, StatsValues> facetStatsValues = f.facetStatsValues; f.accumulateMissing(); res.addFacet(f.name, facetStatsValues); } } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java
License:Apache License
@Override protected void collectDocs() throws IOException { int domainSize = fcontext.base.size(); if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket? return;/*w w w . j av a2 s . c o m*/ } // TODO: refactor some of this logic into a base class boolean countOnly = collectAcc == null && allBucketsAcc == null; boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount(); // Are we expecting many hits per bucket? // FUTURE: pro-rate for nTerms? // FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields. // FUTURE: take into account that bigger ord maps are more expensive than smaller ones // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower // than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each. // this was for heap docvalues produced by UninvertingReader // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data. long domainMultiplier = multiValuedField ? 4L : 2L; boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value, // then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings. // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where // the docid is not used) boolean canDoPerSeg = countOnly && fullRange; boolean accumSeg = manyHitsPerBucket && canDoPerSeg; if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); Filter filter = fcontext.base.getTopFilter(); for (int subIdx = 0; subIdx < leaves.size(); subIdx++) { LeafReaderContext subCtx = leaves.get(subIdx); setNextReaderFirstPhase(subCtx); DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = dis.iterator(); SortedDocValues singleDv = null; SortedSetDocValues multiDv = null; if (multiValuedField) { // TODO: get sub from multi? multiDv = subCtx.reader().getSortedSetDocValues(sf.getName()); if (multiDv == null) { multiDv = DocValues.emptySortedSet(); } // some codecs may optimize SortedSet storage for single-valued fields // this will be null if this is not a wrapped single valued docvalues. if (unwrap_singleValued_multiDv) { singleDv = DocValues.unwrapSingleton(multiDv); } } else { singleDv = subCtx.reader().getSortedDocValues(sf.getName()); if (singleDv == null) { singleDv = DocValues.emptySorted(); } } LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx); if (singleDv != null) { if (accumSeg) { collectPerSeg(singleDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(singleDv, disi, toGlobal); } else { collectDocs(singleDv, disi, toGlobal); } } } else { if (accumSeg) { collectPerSeg(multiDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(multiDv, disi, toGlobal); } else { collectDocs(multiDv, disi, toGlobal); } } } } reuse = null; // better GC }
From source file:org.apache.solr.search.facet.FieldUtil.java
License:Apache License
public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException { SortedDocValues si = context.searcher().getSlowAtomicReader().getSortedDocValues(field.getName()); // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) { // }//www .j a va 2 s . co m return si == null ? DocValues.emptySorted() : si; }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException { SortedDocValues valuesIn = reader.getSortedDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } else {//from w w w. j av a2s . c o m final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySorted(); } else if (info.getDocValuesType() != DocValuesType.NONE) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (info.getIndexOptions() == IndexOptions.NONE) { return DocValues.emptySorted(); } SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio)); return impl.iterator(); } }
From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen();// w ww. ja va2s . c om OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; IndexReader.CacheHelper cacheHelper = getReaderCacheHelper(); if (cacheHelper != null && map.owner == cacheHelper.getKey()) { cachedOrdMaps.put(field, map); } } return dv; } } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map, totalCost); }
From source file:org.elasticsearch.index.fielddata.plain.AbstractAtomicParentChildFieldData.java
License:Apache License
public static AtomicParentChildFieldData empty() { return new AbstractAtomicParentChildFieldData() { @Override/* w w w . j a v a 2 s . c om*/ public long ramBytesUsed() { return 0; } @Override public Collection<Accountable> getChildResources() { return Collections.emptyList(); } @Override public void close() { } @Override public SortedDocValues getOrdinalsValues(String type) { return DocValues.emptySorted(); } @Override public Set<String> types() { return ImmutableSet.of(); } }; }
From source file:org.neo4j.kernel.api.impl.index.IndexReaderStub.java
License:Open Source License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { return DocValues.emptySorted(); }