Example usage for org.apache.lucene.index DocValues emptySortedSet

List of usage examples for org.apache.lucene.index DocValues emptySortedSet

Introduction

In this page you can find the example usage for org.apache.lucene.index DocValues emptySortedSet.

Prototype

public static final SortedSetDocValues emptySortedSet() 

Source Link

Document

An empty SortedDocValues which returns BytesRef#EMPTY_BYTES for every document

Usage

From source file:io.crate.expression.reference.doc.lucene.IpColumnReference.java

License:Apache License

@Override
public void setNextReader(LeafReaderContext context) throws IOException {
    values = context.reader().getSortedSetDocValues(columnName);
    if (values == null) {
        values = DocValues.emptySortedSet();
    }//from www  .  j  ava2s  .co  m
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();//w ww.ja  v a2 s.c  o m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.apache.solr.request.DocValuesStats.java

License:Apache License

public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs,
        boolean calcDistinct, String[] facet) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct);

    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;

    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Stats can only facet on single-valued fields, not: " + facetField);
        }//from w  ww  .  j  a va2  s.co m

        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField,
                calcDistinct);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

    SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (multiValued) {
        si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);

        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiSortedDocValues) {
            ordinalMap = ((MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException(
                "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];

    Filter filter = docs.getTopFilter();
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();

    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;

        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;

            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex,
                            ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex,
                            ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex,
                        ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];

        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);

    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java

License:Apache License

@Override
protected void collectDocs() throws IOException {
    int domainSize = fcontext.base.size();

    if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
        return;//from  w ww .  j  a  v  a  2 s . c  o m
    }

    // TODO: refactor some of this logic into a base class
    boolean countOnly = collectAcc == null && allBucketsAcc == null;
    boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

    // Are we expecting many hits per bucket?
    // FUTURE: pro-rate for nTerms?
    // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
    // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
    // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
    // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
    // this was for heap docvalues produced by UninvertingReader
    // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
    long domainMultiplier = multiValuedField ? 4L : 2L;
    boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests

    // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
    // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
    // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
    // the docid is not used)
    boolean canDoPerSeg = countOnly && fullRange;
    boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

    if (freq.perSeg != null)
        accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic

    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    Filter filter = fcontext.base.getTopFilter();

    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
        LeafReaderContext subCtx = leaves.get(subIdx);

        setNextReaderFirstPhase(subCtx);

        DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = dis.iterator();

        SortedDocValues singleDv = null;
        SortedSetDocValues multiDv = null;
        if (multiValuedField) {
            // TODO: get sub from multi?
            multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
            if (multiDv == null) {
                multiDv = DocValues.emptySortedSet();
            }
            // some codecs may optimize SortedSet storage for single-valued fields
            // this will be null if this is not a wrapped single valued docvalues.
            if (unwrap_singleValued_multiDv) {
                singleDv = DocValues.unwrapSingleton(multiDv);
            }
        } else {
            singleDv = subCtx.reader().getSortedDocValues(sf.getName());
            if (singleDv == null) {
                singleDv = DocValues.emptySorted();
            }
        }

        LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

        if (singleDv != null) {
            if (accumSeg) {
                collectPerSeg(singleDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(singleDv, disi, toGlobal);
                } else {
                    collectDocs(singleDv, disi, toGlobal);
                }
            }
        } else {
            if (accumSeg) {
                collectPerSeg(multiDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(multiDv, disi, toGlobal);
                } else {
                    collectDocs(multiDv, disi, toGlobal);
                }
            }
        }
    }

    reuse = null; // better GC
}

From source file:org.apache.solr.search.facet.FieldUtil.java

License:Apache License

public static SortedSetDocValues getSortedSetDocValues(QueryContext context, SchemaField field, QParser qparser)
        throws IOException {
    SortedSetDocValues si = context.searcher().getSlowAtomicReader().getSortedSetDocValues(field.getName());
    return si == null ? DocValues.emptySortedSet() : si;
}

From source file:org.apache.solr.uninverting.DocTermOrds.java

License:Apache License

/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
    if (isEmpty()) {
        return DocValues.emptySortedSet();
    } else {//from ww  w.ja  va 2  s  . c o m
        return new LegacySortedSetDocValuesWrapper(new Iterator(reader), reader.maxDoc());
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }//from   w  w  w.j a va  2s . c  o m

    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }

    // ok we need to uninvert. check if we can optimize a bit.

    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }

    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();//from   www. j  av a 2  s .co  m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.codelibs.elasticsearch.search.MultiValueMode.java

License:Apache License

/**
 * Return a {SortedDocValues} instance that can be used to sort root documents
 * with this mode, the provided values and filters for root/inner documents.
 *
 * For every root document, the values of its inner documents will be aggregated.
 *
 * Allowed Modes: MIN, MAX//  w  ww  .  ja va2  s. c  o  m
 *
 * NOTE: Calling the returned instance on docs that are not root docs is illegal
 *       The returned instance can only be evaluate the current and upcoming docs
 */
public SortedDocValues select(final RandomAccessOrds values, final BitSet rootDocs,
        final DocIdSetIterator innerDocs) throws IOException {
    if (rootDocs == null || innerDocs == null) {
        return select(DocValues.emptySortedSet());
    }
    final SortedDocValues selectedValues = select(values);

    return new SortedDocValues() {

        int lastSeenRootDoc = 0;
        int lastEmittedOrd = -1;

        @Override
        public BytesRef lookupOrd(int ord) {
            return selectedValues.lookupOrd(ord);
        }

        @Override
        public int getValueCount() {
            return selectedValues.getValueCount();
        }

        @Override
        public int getOrd(int rootDoc) {
            assert rootDocs.get(rootDoc) : "can only sort root documents";
            assert rootDoc >= lastSeenRootDoc : "can only evaluate current and upcoming root docs";
            if (rootDoc == lastSeenRootDoc) {
                return lastEmittedOrd;
            }

            try {
                final int prevRootDoc = rootDocs.prevSetBit(rootDoc - 1);
                final int firstNestedDoc;
                if (innerDocs.docID() > prevRootDoc) {
                    firstNestedDoc = innerDocs.docID();
                } else {
                    firstNestedDoc = innerDocs.advance(prevRootDoc + 1);
                }

                lastSeenRootDoc = rootDoc;
                return lastEmittedOrd = pick(selectedValues, innerDocs, firstNestedDoc, rootDoc);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder.java

License:Apache License

public static IndexOrdinalsFieldData buildEmpty(Index index, Settings settings, final IndexReader indexReader,
        IndexOrdinalsFieldData indexFieldData) throws IOException {
    assert indexReader.leaves().size() > 1;

    final AtomicOrdinalsFieldData[] atomicFD = new AtomicOrdinalsFieldData[indexReader.leaves().size()];
    final RandomAccessOrds[] subs = new RandomAccessOrds[indexReader.leaves().size()];
    for (int i = 0; i < indexReader.leaves().size(); ++i) {
        atomicFD[i] = new AbstractAtomicOrdinalsFieldData() {
            @Override//w w  w  .  j  av  a2 s  .c  o  m
            public RandomAccessOrds getOrdinalsValues() {
                return DocValues.emptySortedSet();
            }

            @Override
            public long ramBytesUsed() {
                return 0;
            }

            @Override
            public Collection<Accountable> getChildResources() {
                return Collections.emptyList();
            }

            @Override
            public void close() {
            }
        };
        subs[i] = atomicFD[i].getOrdinalsValues();
    }
    final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT);
    return new InternalGlobalOrdinalsIndexFieldData(index, settings, indexFieldData.getFieldNames(),
            indexFieldData.getFieldDataType(), atomicFD, ordinalMap, 0);
}