Example usage for org.apache.lucene.index DocValues unwrapSingleton

List of usage examples for org.apache.lucene.index DocValues unwrapSingleton

Introduction

In this page you can find the example usage for org.apache.lucene.index DocValues unwrapSingleton.

Prototype

public static NumericDocValues unwrapSingleton(SortedNumericDocValues dv) 

Source Link

Document

Returns a single-valued view of the SortedNumericDocValues, if it was previously wrapped with #singleton(NumericDocValues) , or null.

Usage

From source file:org.apache.solr.request.DocValuesStats.java

License:Apache License

public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs,
        boolean calcDistinct, String[] facet) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct);

    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;

    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Stats can only facet on single-valued fields, not: " + facetField);
        }//from w  w w. jav a 2  s .com

        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField,
                calcDistinct);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

    SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (multiValued) {
        si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);

        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiSortedDocValues) {
            ordinalMap = ((MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException(
                "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];

    Filter filter = docs.getTopFilter();
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();

    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;

        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;

            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex,
                            ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex,
                            ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex,
                        ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];

        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);

    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }

    return res;
}

From source file:org.apache.solr.request.IntervalFacets.java

License:Apache License

private void getCountString() throws IOException {
    Filter filter = docs.getTopFilter();
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        if (dis == null) {
            continue;
        }/*from w  ww  .ja va 2 s .  c  om*/
        DocIdSetIterator disi = dis.iterator();
        if (disi != null) {
            if (schemaField.multiValued()) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    accumIntervalsSingle(singleton, disi, dis.bits());
                } else {
                    accumIntervalsMulti(sub, disi, dis.bits());
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(schemaField.getName());
                if (sub == null) {
                    continue;
                }
                accumIntervalsSingle(sub, disi, dis.bits());
            }
        }
    }
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java

License:Apache License

@Override
protected void collectDocs() throws IOException {
    int domainSize = fcontext.base.size();

    if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
        return;// w  ww  .  j  av  a  2 s  .  c  om
    }

    // TODO: refactor some of this logic into a base class
    boolean countOnly = collectAcc == null && allBucketsAcc == null;
    boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

    // Are we expecting many hits per bucket?
    // FUTURE: pro-rate for nTerms?
    // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
    // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
    // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
    // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
    // this was for heap docvalues produced by UninvertingReader
    // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
    long domainMultiplier = multiValuedField ? 4L : 2L;
    boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests

    // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
    // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
    // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
    // the docid is not used)
    boolean canDoPerSeg = countOnly && fullRange;
    boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

    if (freq.perSeg != null)
        accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic

    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    Filter filter = fcontext.base.getTopFilter();

    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
        LeafReaderContext subCtx = leaves.get(subIdx);

        setNextReaderFirstPhase(subCtx);

        DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = dis.iterator();

        SortedDocValues singleDv = null;
        SortedSetDocValues multiDv = null;
        if (multiValuedField) {
            // TODO: get sub from multi?
            multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
            if (multiDv == null) {
                multiDv = DocValues.emptySortedSet();
            }
            // some codecs may optimize SortedSet storage for single-valued fields
            // this will be null if this is not a wrapped single valued docvalues.
            if (unwrap_singleValued_multiDv) {
                singleDv = DocValues.unwrapSingleton(multiDv);
            }
        } else {
            singleDv = subCtx.reader().getSortedDocValues(sf.getName());
            if (singleDv == null) {
                singleDv = DocValues.emptySorted();
            }
        }

        LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

        if (singleDv != null) {
            if (accumSeg) {
                collectPerSeg(singleDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(singleDv, disi, toGlobal);
                } else {
                    collectDocs(singleDv, disi, toGlobal);
                }
            }
        } else {
            if (accumSeg) {
                collectPerSeg(multiDv, disi, toGlobal);
            } else {
                if (canDoPerSeg && toGlobal != null) {
                    collectCounts(multiDv, disi, toGlobal);
                } else {
                    collectDocs(multiDv, disi, toGlobal);
                }
            }
        }
    }

    reuse = null; // better GC
}

From source file:org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.java

License:Apache License

private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException {
    segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName);
    segmentAccums = ArrayUtil.grow(segmentAccums, (int) segmentSSDV.getValueCount() + 1);//+1
    // zero counts, -1 parent
    Arrays.fill(segmentAccums, 0, (int) segmentSSDV.getValueCount() + 1, 0x00000000ffffffffL);
    segmentSDV = DocValues.unwrapSingleton(segmentSSDV);
    return segmentSSDV.getValueCount() != 0;// perhaps we need to count "missings"?? 
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testActuallySingleValued() throws IOException {
    Directory dir = newDirectory();//from   ww w  .  ja  v  a2  s  .  c o m
    IndexWriterConfig iwconfig = newIndexWriterConfig(null);
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
    assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(3, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals("bar", value.utf8ToString());

    value = v.lookupOrd(1);
    assertEquals("baz", value.utf8ToString());

    ir.close();
    dir.close();
}

From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java

License:Apache License

/**
 * Given a {SortedNumericDocValues}, return a {SortedNumericDoubleValues}
 * instance that will translate long values to doubles using
 * {NumericUtils#sortableLongToDouble(long)}.
 *///from ww  w  . j  a  va2s.  c  o m
public static SortedNumericDoubleValues sortableLongBitsToDoubles(SortedNumericDocValues values) {
    final NumericDocValues singleton = DocValues.unwrapSingleton(values);
    if (singleton != null) {
        final NumericDoubleValues doubles;
        if (singleton instanceof SortableLongBitsNumericDocValues) {
            doubles = ((SortableLongBitsNumericDocValues) singleton).getDoubleValues();
        } else {
            doubles = new SortableLongBitsToNumericDoubleValues(singleton);
        }
        final Bits docsWithField = DocValues.unwrapSingletonBits(values);
        return singleton(doubles, docsWithField);
    } else {
        if (values instanceof SortableLongBitsSortedNumericDocValues) {
            return ((SortableLongBitsSortedNumericDocValues) values).getDoubleValues();
        } else {
            return new SortableLongBitsToSortedNumericDoubleValues(values);
        }
    }
}

From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java

License:Apache License

/**
 * Wrap the provided {SortedNumericDocValues} instance to cast all values to doubles.
 *///from   w w  w. ja v  a2  s. c  om
public static SortedNumericDoubleValues castToDouble(final SortedNumericDocValues values) {
    final NumericDocValues singleton = DocValues.unwrapSingleton(values);
    if (singleton != null) {
        final Bits docsWithField = DocValues.unwrapSingletonBits(values);
        return singleton(new DoubleCastedValues(singleton), docsWithField);
    } else {
        return new SortedDoubleCastedValues(values);
    }
}

From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java

License:Apache License

/**
 * Returns whether the provided values *might* be multi-valued. There is no
 * guarantee that this method will return <tt>false</tt> in the single-valued case.
 *//*from w  w w .j a v  a  2 s.c  o m*/
public static boolean isMultiValued(SortedSetDocValues values) {
    return DocValues.unwrapSingleton(values) == null;
}

From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java

License:Apache License

/**
 * Returns whether the provided values *might* be multi-valued. There is no
 * guarantee that this method will return <tt>false</tt> in the single-valued case.
 *//* w w w.  j a v a  2  s . c om*/
public static boolean isMultiValued(SortedNumericDocValues values) {
    return DocValues.unwrapSingleton(values) == null;
}

From source file:org.codelibs.elasticsearch.index.fielddata.FieldData.java

License:Apache License

/**
 * If <code>dv</code> is an instance of {RandomAccessOrds}, then return
 * it, otherwise wrap it into a slow wrapper that implements random access.
 *//*w w  w .j  a v a  2 s . co  m*/
public static RandomAccessOrds maybeSlowRandomAccessOrds(final SortedSetDocValues dv) {
    if (dv instanceof RandomAccessOrds) {
        return (RandomAccessOrds) dv;
    } else {
        assert DocValues
                .unwrapSingleton(dv) == null : "this method expect singleton to return random-access ords";
        return new RandomAccessOrds() {

            int cardinality;
            long[] ords = new long[0];
            int ord;

            @Override
            public void setDocument(int docID) {
                cardinality = 0;
                dv.setDocument(docID);
                for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) {
                    ords = ArrayUtil.grow(ords, cardinality + 1);
                    ords[cardinality++] = ord;
                }
                ord = 0;
            }

            @Override
            public long nextOrd() {
                return ords[ord++];
            }

            @Override
            public BytesRef lookupOrd(long ord) {
                return dv.lookupOrd(ord);
            }

            @Override
            public long getValueCount() {
                return dv.getValueCount();
            }

            @Override
            public long ordAt(int index) {
                return ords[index];
            }

            @Override
            public int cardinality() {
                return cardinality;
            }
        };
    }
}