Example usage for org.apache.lucene.index DocValues emptySorted

List of usage examples for org.apache.lucene.index DocValues emptySorted

Introduction

In this page you can find the example usage for org.apache.lucene.index DocValues emptySorted.

Prototype

public static final SortedDocValues emptySorted() 

Source Link

Document

An empty SortedDocValues which returns BytesRef#EMPTY_BYTES for every document

Usage

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();//from   w  ww .  j a  v a2 s  . c  o m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}

From source file:org.apache.solr.request.DocValuesStats.java

License:Apache License

public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs,
        boolean calcDistinct, String[] facet) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct);

    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;

    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Stats can only facet on single-valued fields, not: " + facetField);
        }//from  w ww .  j  av a2s.c om

        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField,
                calcDistinct);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

    SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (multiValued) {
        si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);

        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiSortedDocValues) {
            ordinalMap = ((MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException(
                "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];

    Filter filter = docs.getTopFilter();
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();

    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;

        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;

            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex,
                            ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex,
                            ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex,
                        ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];

        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);

    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java

License:Apache License

@Override
protected void collectDocs() throws IOException {
    int domainSize = fcontext.base.size();

    if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
        return;/*w w  w .  j  av a2  s .  c  o  m*/
    }

    // TODO: refactor some of this logic into a base class
    boolean countOnly = collectAcc == null && allBucketsAcc == null;
    boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

    // Are we expecting many hits per bucket?
    // FUTURE: pro-rate for nTerms?
    // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
    // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
    // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
    // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
    // this was for heap docvalues produced by UninvertingReader
    // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
    long domainMultiplier = multiValuedField ? 4L : 2L;
    boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests

    // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
    // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
    // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
    // the docid is not used)
    boolean canDoPerSeg = countOnly && fullRange;
    boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

    if (freq.perSeg != null)
        accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic

    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    Filter filter = fcontext.base.getTopFilter();

    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
        LeafReaderContext subCtx = leaves.get(subIdx);

        setNextReaderFirstPhase(subCtx);

        DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = dis.iterator();

        SortedDocValues singleDv = null;
        SortedSetDocValues multiDv = null;
        if (multiValuedField) {
            // TODO: get sub from multi?
            multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
            if (multiDv == null) {
                multiDv = DocValues.emptySortedSet();
            }
            // some codecs may optimize SortedSet storage for single-valued fields
            // this will be null if this is not a wrapped single valued docvalues.
            if (unwrap_singleValued_multiDv) {
                singleDv = DocValues.unwrapSingleton(multiDv);
            }
        } else {
            singleDv = subCtx.reader().getSortedDocValues(sf.getName());
            if (singleDv == null) {
                singleDv = DocValues.emptySorted();
            }
        }

        LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

        if (singleDv != null) {
            if (accumSeg) {
                collectPerSeg(singleDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(singleDv, disi, toGlobal);
                } else {
                    collectDocs(singleDv, disi, toGlobal);
                }
            }
        } else {
            if (accumSeg) {
                collectPerSeg(multiDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(multiDv, disi, toGlobal);
                } else {
                    collectDocs(multiDv, disi, toGlobal);
                }
            }
        }
    }

    reuse = null; // better GC
}

From source file:org.apache.solr.search.facet.FieldUtil.java

License:Apache License

public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser)
        throws IOException {
    SortedDocValues si = context.searcher().getSlowAtomicReader().getSortedDocValues(field.getName());
    // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) {
    // }//www  .j a va 2  s  . co m

    return si == null ? DocValues.emptySorted() : si;
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio)
        throws IOException {
    SortedDocValues valuesIn = reader.getSortedDocValues(field);
    if (valuesIn != null) {
        // Not cached here by FieldCacheImpl (cached instead
        // per-thread by SegmentReader):
        return valuesIn;
    } else {//from   w w  w.  j  av a2s  .  c o  m
        final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
        if (info == null) {
            return DocValues.emptySorted();
        } else if (info.getDocValuesType() != DocValuesType.NONE) {
            // we don't try to build a sorted instance from numeric/binary doc
            // values because dedup can be very costly
            throw new IllegalStateException(
                    "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
        } else if (info.getIndexOptions() == IndexOptions.NONE) {
            return DocValues.emptySorted();
        }
        SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader,
                new CacheKey(field, acceptableOverheadRatio));
        return impl.iterator();
    }
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();//  w ww. ja  va2s . c  om
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}

From source file:org.elasticsearch.index.fielddata.plain.AbstractAtomicParentChildFieldData.java

License:Apache License

public static AtomicParentChildFieldData empty() {
    return new AbstractAtomicParentChildFieldData() {

        @Override/*  w  w w .  j a  v a  2  s .  c om*/
        public long ramBytesUsed() {
            return 0;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Collections.emptyList();
        }

        @Override
        public void close() {
        }

        @Override
        public SortedDocValues getOrdinalsValues(String type) {
            return DocValues.emptySorted();
        }

        @Override
        public Set<String> types() {
            return ImmutableSet.of();
        }
    };
}

From source file:org.neo4j.kernel.api.impl.index.IndexReaderStub.java

License:Open Source License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    return DocValues.emptySorted();
}