Example usage for org.apache.lucene.index SortedDocValues getValueCount

List of usage examples for org.apache.lucene.index SortedDocValues getValueCount

Introduction

In this page you can find the example usage for org.apache.lucene.index SortedDocValues getValueCount.

Prototype

public abstract int getValueCount();

Source Link

Document

Returns the number of unique values.

Usage

From source file:lucene.security.index.SecureAtomicReader.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    final SortedDocValues sortedDocValues = in.getSortedDocValues(field);
    if (sortedDocValues == null) {
        return null;
    }//from   ww  w  .j  a  va2s .  c o m
    return new SortedDocValues() {

        @Override
        public void lookupOrd(int ord, BytesRef result) {
            sortedDocValues.lookupOrd(ord, result);
        }

        @Override
        public int getValueCount() {
            return sortedDocValues.getValueCount();
        }

        @Override
        public int getOrd(int docID) {
            try {
                if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) {
                    return sortedDocValues.getOrd(docID);
                }
                return -1; // Default missing value.
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:org.apache.solr.handler.component.ExpandAllComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {

    if (!doExpandAll(rb)) {
        return;/*from  w ww  .jav  a  2s  .co  m*/
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query = null;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector = null;
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectOpenHashMap groups = groupExpandCollector.getGroups();
    Iterator<IntObjectCursor> it = groups.iterator();
    Map<String, DocSlice> outMap = new HashMap();
    BytesRef bytesRef = new BytesRef();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();

    while (it.hasNext()) {
        IntObjectCursor cursor = it.next();
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            values.lookupOrd(ord, bytesRef);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}

From source file:org.apache.solr.handler.component.ExpandComponent.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  ww  w .j  a  v  a2s. c o  m*/
public void process(ResponseBuilder rb) throws IOException {

    if (!rb.doExpand) {
        return;
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList<>();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectMap groups = groupExpandCollector.getGroups();
    Map<String, DocSlice> outMap = new HashMap();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();
    for (IntObjectCursor cursor : (Iterable<IntObjectCursor>) groups) {
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            final BytesRef bytesRef = values.lookupOrd(ord);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}

From source file:org.apache.solr.request.SimpleFacets.java

License:Apache License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
 * The field must have at most one indexed token per document.
 */// w  ww .  j ava  2  s .  c o  m
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    //  we should use an alternate strategy to avoid
    //  1) creating another huge int[] for the counts
    //  2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.

    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    NamedList<Integer> res = new NamedList<Integer>();

    SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = si.lookupTerm(prefixRef);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = si.lookupTerm(prefixRef);
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIterator iter = docs.iterator();

        while (iter.hasNext()) {
            int term = si.getOrd(iter.nextDoc());
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        if (startTermIndex == -1) {
            missingCount = counts[0];
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                si.lookupOrd(startTermIndex + tnum, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }

        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i += off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                si.lookupOrd(startTermIndex + i, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }

    if (missing) {
        if (missingCount < 0) {
            missingCount = getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }

    return res;
}

From source file:org.apache.solr.schema.BoolField.java

License:Apache License

@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), field);

    // figure out what ord maps to true
    int nord = sindex.getValueCount();
    BytesRef br = new BytesRef();
    // if no values in the segment, default trueOrd to something other then -1 (missing)
    int tord = -2;
    for (int i = 0; i < nord; i++) {
        sindex.lookupOrd(i, br);/*from   www.  j  a v  a2 s .c o  m*/
        if (br.length == 1 && br.bytes[br.offset] == 'T') {
            tord = i;
            break;
        }
    }

    final int trueOrd = tord;

    return new BoolDocValues(this) {
        @Override
        public boolean boolVal(int doc) {
            return sindex.getOrd(doc) == trueOrd;
        }

        @Override
        public boolean exists(int doc) {
            return sindex.getOrd(doc) != -1;
        }

        @Override
        public ValueFiller getValueFiller() {
            return new ValueFiller() {
                private final MutableValueBool mval = new MutableValueBool();

                @Override
                public MutableValue getValue() {
                    return mval;
                }

                @Override
                public void fillValue(int doc) {
                    int ord = sindex.getOrd(doc);
                    mval.value = (ord == trueOrd);
                    mval.exists = (ord != -1);
                }
            };
        }
    };
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java

License:Apache License

private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal)
        throws IOException {
    int segMax = singleDv.getValueCount() + 1;
    final int[] counts = getCountArr(segMax);

    /** alternate trial implementations
     // ord/*from  w w  w.  j  av a 2 s.  co  m*/
     // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );
            
    FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
    while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      counts[ ordValues.getOrd() + 1]++;
    }
     **/

    // calculate segment-local counts
    int doc;
    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
        FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            counts[fc.getOrd(doc) + 1]++;
        }
    } else {
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (singleDv.advanceExact(doc)) {
                counts[singleDv.ordValue() + 1]++;
            }
        }
    }

    // convert segment-local counts to global counts
    for (int i = 1; i < segMax; i++) {
        int segCount = counts[i];
        if (segCount > 0) {
            int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
            countAcc.incrementCount(slot, segCount);
        }
    }
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorDV.java

License:Apache License

private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal)
        throws IOException {
    int segMax = singleDv.getValueCount() + 1;
    final int[] counts = getCountArr(segMax);

    int doc;/*ww  w. j a v a 2s  .  co  m*/
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        counts[singleDv.getOrd(doc) + 1]++;
    }

    for (int i = 1; i < segMax; i++) {
        int segCount = counts[i];
        if (segCount > 0) {
            int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
            countAcc.incrementCount(slot, segCount);
        }
    }
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }//w  w  w  .j  ava 2 s  .  c o  m

    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }

    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }

    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }

    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }

    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }

    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i],
                unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }

    int nTerms = termsIndex.getValueCount();

    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }

    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");

    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }

    // test bad field
    terms = cache.getTerms(reader, "bogusfield");

    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);

    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }

    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);

    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void testDocValuesIntegration() throws Exception {
    Directory dir = newDirectory();/*  w w w.  j  av a 2 s  .  c  o m*/
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
    doc.add(new NumericDocValuesField("numeric", 42));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);

    // Binary type: can be retrieved via getTerms()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
    });

    BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
    assertEquals(0, binary.nextDoc());
    final BytesRef term = binary.binaryValue();
    assertEquals("binary value", term.utf8ToString());

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "binary");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "binary");
    });

    Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
    assertTrue(bits.get(0));

    // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sorted");
    });

    binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
    assertEquals(0, binary.nextDoc());

    BytesRef scratch = binary.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());

    SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
    assertEquals(0, sorted.nextDoc());
    assertEquals(0, sorted.ordValue());
    assertEquals(1, sorted.getValueCount());
    scratch = sorted.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());

    SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(1, sortedSet.getValueCount());

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
    assertTrue(bits.get(0));

    // Numeric type: can be retrieved via getInts() and so on
    NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
    assertEquals(0, numeric.nextDoc());
    assertEquals(42, numeric.longValue());

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "numeric");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "numeric");
    });

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
    assertTrue(bits.get(0));

    // SortedSet type: can be retrieved via getDocTermOrds() 
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "sortedset");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sortedset");
    });

    sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(1, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(2, sortedSet.getValueCount());

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
    assertTrue(bits.get(0));

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
        assertEquals(expected.getValueCount(), 0);
        return;/*from  w ww.  java  2  s  .  c o m*/
    }
    assertEquals(expected.getValueCount(), actual.getValueCount());

    // compare ord lists
    while (true) {
        int docID = expected.nextDoc();
        if (docID == NO_MORE_DOCS) {
            assertEquals(NO_MORE_DOCS, actual.nextDoc());
            break;
        }
        assertEquals(docID, actual.nextDoc());
        assertEquals(expected.ordValue(), actual.ordValue());
        assertEquals(expected.binaryValue(), actual.binaryValue());
    }

    // compare ord dictionary
    for (long i = 0; i < expected.getValueCount(); i++) {
        final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i));
        final BytesRef actualBytes = actual.lookupOrd((int) i);
        assertEquals(expectedBytes, actualBytes);
    }

    // compare termsenum
    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}