Example usage for org.apache.lucene.util UnicodeUtil BIG_TERM

List of usage examples for org.apache.lucene.util UnicodeUtil BIG_TERM

Introduction

In this page you can find the example usage for org.apache.lucene.util UnicodeUtil BIG_TERM.

Prototype

BytesRef BIG_TERM

To view the source code for org.apache.lucene.util UnicodeUtil BIG_TERM.

Click Source Link

Document

A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms (e.g.

Usage

From source file:lux.solr.MissingStringLastComparatorSource.java

License:Apache License

public MissingStringLastComparatorSource() {
    this(UnicodeUtil.BIG_TERM);
}

From source file:org.apache.solr.request.DocValuesFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<Integer>();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (schemaField.multiValued()) {
        si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }//from   w  ww . j a  v  a 2 s. c  om
    } else {
        SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : new SingletonSortedSetDocValues(single);
        if (single instanceof MultiSortedDocValues) {
            ordinalMap = ((MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException(
                "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef);
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        Filter filter = docs.getTopFilter();
        List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            AtomicReaderContext leaf = leaves.get(subIndex);
            DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (schemaField.multiValued()) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = SortedSetDocValues.EMPTY;
                    }
                    if (sub instanceof SingletonSortedSetDocValues) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        final SortedDocValues values = ((SingletonSortedSetDocValues) sub).getSortedDocValues();
                        accumSingle(counts, startTermIndex, values, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = SortedDocValues.EMPTY;
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }

        if (startTermIndex == -1) {
            missingCount = counts[0];
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                si.lookupOrd(startTermIndex + tnum, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }

        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i += off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                si.lookupOrd(startTermIndex + i, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}

From source file:org.apache.solr.request.SimpleFacets.java

License:Apache License

/**
 * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
 * The field must have at most one indexed token per document.
 *///from   w  ww.ja v a 2s. c  om
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    //  we should use an alternate strategy to avoid
    //  1) creating another huge int[] for the counts
    //  2) looping over that huge int[] looking for the rare non-zeros.
    //
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    //

    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.

    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    NamedList<Integer> res = new NamedList<Integer>();

    SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = si.lookupTerm(prefixRef);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = si.lookupTerm(prefixRef);
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        DocIterator iter = docs.iterator();

        while (iter.hasNext()) {
            int term = si.getOrd(iter.nextDoc());
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
                counts[arrIdx]++;
        }

        if (startTermIndex == -1) {
            missingCount = counts[0];
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                si.lookupOrd(startTermIndex + tnum, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }

        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i += off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                si.lookupOrd(startTermIndex + i, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }

    if (missing) {
        if (missingCount < 0) {
            missingCount = getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }

    return res;
}

From source file:org.apache.solr.request.UnInvertedField.java

License:Apache License

public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit,
        Integer mincount, boolean missing, String sort, String prefix) throws IOException {
    use.incrementAndGet();//from w  w w .j a  va  2  s . c o m

    FieldType ft = searcher.getSchema().getFieldType(field);

    NamedList<Integer> res = new NamedList<Integer>(); // order is important

    DocSet docs = baseDocs;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();

    //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
    if (baseSize >= mincount) {

        final int[] index = this.index;
        // tricky: we add more more element than we need because we will reuse this array later
        // for ordering term ords before converting to term labels.
        final int[] counts = new int[numTermsInField + 1];

        //
        // If there is prefix, find it's start and end term numbers
        //
        int startTerm = 0;
        int endTerm = numTermsInField; // one past the end

        TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
        if (te != null && prefix != null && prefix.length() > 0) {
            final BytesRef prefixBr = new BytesRef(prefix);
            if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
                startTerm = numTermsInField;
            } else {
                startTerm = (int) te.ord();
            }
            prefixBr.append(UnicodeUtil.BIG_TERM);
            if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
                endTerm = numTermsInField;
            } else {
                endTerm = (int) te.ord();
            }
        }

        /***********
        // Alternative 2: get the docSet of the prefix (could take a while) and
        // then do the intersection with the baseDocSet first.
        if (prefix != null && prefix.length() > 0) {
          docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
          // The issue with this method are problems of returning 0 counts for terms w/o
          // the prefix.  We can't just filter out those terms later because it may
          // mean that we didn't collect enough terms in the queue (in the sorted case).
        }
        ***********/

        boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && startTerm == 0
                && endTerm == numTermsInField && docs instanceof BitDocSet;

        if (doNegative) {
            OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone();
            bs.flip(0, maxDoc);
            // TODO: when iterator across negative elements is available, use that
            // instead of creating a new bitset and inverting.
            docs = new BitDocSet(bs, maxDoc - baseSize);
            // simply negating will mean that we have deleted docs in the set.
            // that should be OK, as their entries in our table should be empty.
            //System.out.println("  NEG");
        }

        // For the biggest terms, do straight set intersections
        for (TopTerm tt : bigTerms.values()) {
            //System.out.println("  do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
            // TODO: counts could be deferred if sorted==false
            if (tt.termNum >= startTerm && tt.termNum < endTerm) {
                counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
                //System.out.println("    count=" + counts[tt.termNum]);
            } else {
                //System.out.println("SKIP term=" + tt.termNum);
            }
        }

        // TODO: we could short-circuit counting altogether for sorted faceting
        // where we already have enough terms from the bigTerms

        // TODO: we could shrink the size of the collection array, and
        // additionally break when the termNumber got above endTerm, but
        // it would require two extra conditionals in the inner loop (although
        // they would be predictable for the non-prefix case).
        // Perhaps a different copy of the code would be warranted.

        if (termInstances > 0) {
            DocIterator iter = docs.iterator();
            while (iter.hasNext()) {
                int doc = iter.nextDoc();
                //System.out.println("iter doc=" + doc);
                int code = index[doc];

                if ((code & 0xff) == 1) {
                    //System.out.println("  ptr");
                    int pos = code >>> 8;
                    int whichArray = (doc >>> 16) & 0xff;
                    byte[] arr = tnums[whichArray];
                    int tnum = 0;
                    for (;;) {
                        int delta = 0;
                        for (;;) {
                            byte b = arr[pos++];
                            delta = (delta << 7) | (b & 0x7f);
                            if ((b & 0x80) == 0)
                                break;
                        }
                        if (delta == 0)
                            break;
                        tnum += delta - TNUM_OFFSET;
                        //System.out.println("    tnum=" + tnum);
                        counts[tnum]++;
                    }
                } else {
                    //System.out.println("  inlined");
                    int tnum = 0;
                    int delta = 0;
                    for (;;) {
                        delta = (delta << 7) | (code & 0x7f);
                        if ((code & 0x80) == 0) {
                            if (delta == 0)
                                break;
                            tnum += delta - TNUM_OFFSET;
                            //System.out.println("    tnum=" + tnum);
                            counts[tnum]++;
                            delta = 0;
                        }
                        code >>>= 8;
                    }
                }
            }
        }
        final CharsRef charsRef = new CharsRef();

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, numTermsInField);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            //System.out.println("START=" + startTerm + " END=" + endTerm);
            for (int i = startTerm; i < endTerm; i++) {
                int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // now select the right page from the results

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            final int[] indirect = counts; // reuse the counts array for the index into the tnums array
            assert indirect.length >= sortedIdxEnd;

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;

                indirect[i] = i; // store the index for indirect sorting
                sorted[i] = tnum; // reuse the "sorted" array to store the term numbers for indirect sorting

                // add a null label for now... we'll fill it in later.
                res.add(null, c);
            }

            // now sort the indexes by the term numbers
            PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() {
                @Override
                public int compare(int a, int b) {
                    return (int) sorted[a] - (int) sorted[b];
                }

                @Override
                public boolean lessThan(int a, int b) {
                    return sorted[a] < sorted[b];
                }

                @Override
                public boolean equals(int a, int b) {
                    return sorted[a] == sorted[b];
                }
            });

            // convert the term numbers to term values and set
            // as the label
            //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                int idx = indirect[i];
                int tnum = (int) sorted[idx];
                final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
                //System.out.println("  label=" + label);
                res.setName(idx - sortedIdxStart, label);
            }

        } else {
            // add results in index order
            int i = startTerm;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = startTerm + off;
                off = 0;
            }

            for (; i < endTerm; i++) {
                int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;

                final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
                res.add(label, c);
            }
        }
    }

    if (missing) {
        // TODO: a faster solution for this?
        res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
    }

    //System.out.println("  res=" + res);

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java

License:Apache License

@Override
protected void findStartAndEndOrds() throws IOException {
    if (multiValuedField) {
        si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }/*from   w  w w  .j  a  va  2 s  .co  m*/
    } else {
        // multi-valued view
        SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
        si = DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }

    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
    }

    if (prefixRef != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 0;
        endTermIndex = (int) si.getValueCount();
    }

    nTerms = endTermIndex - startTermIndex;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayUIF.java

License:Apache License

@Override
protected void findStartAndEndOrds() throws IOException {
    uif = UnInvertedField.getUnInvertedField(freq.field, fcontext.searcher);
    te = uif.getOrdTermsEnum(fcontext.searcher.getSlowAtomicReader()); // "te" can be null

    startTermIndex = 0;/*from  w  ww . j  a  va2  s .c o m*/
    endTermIndex = uif.numTerms(); // one past the end

    if (prefixRef != null && te != null) {
        if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
            startTermIndex = uif.numTerms();
        } else {
            startTermIndex = (int) te.ord();
        }
        prefixRef.append(UnicodeUtil.BIG_TERM);
        if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
            endTermIndex = uif.numTerms();
        } else {
            endTermIndex = (int) te.ord();
        }
    }

    nTerms = endTermIndex - startTermIndex;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorDV.java

License:Apache License

protected void findStartAndEndOrds() throws IOException {
    if (multiValuedField) {
        si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }//w  w w.j  a v  a2 s  .com
    } else {
        SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
        si = DocValues.singleton(single); // multi-valued view
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }

    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
    }

    if (prefixRef != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = 0;
        endTermIndex = (int) si.getValueCount();
    }

    nTerms = endTermIndex - startTermIndex;
}

From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java

License:Apache License

public void testActualMissingValue(boolean reverse) throws IOException {
    // missing value is set to an actual value
    final String[] values = new String[randomIntBetween(2, 30)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(getRandom());
    }/*from   w  w  w  .  j  av a  2 s.  com*/
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(getRandom(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }

    final IndexFieldData indexFieldData = getForField("value");
    final String missingValue = values[1];
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(missingValue, MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(),
            randomBoolean() ? numDocs : randomIntBetween(10, numDocs),
            new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        final BytesRef value = new BytesRef(docValue == null ? missingValue : docValue);
        if (reverse) {
            assertTrue(previousValue.compareTo(value) >= 0);
        } else {
            assertTrue(previousValue.compareTo(value) <= 0);
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}

From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java

License:Apache License

public void testSortMissing(boolean first, boolean reverse) throws IOException {
    final String[] values = new String[randomIntBetween(2, 10)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(getRandom());
    }//ww w  .  jav  a2s. c  o  m
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(getRandom(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    final IndexFieldData indexFieldData = getForField("value");
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(first ? "_first" : "_last",
            MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(),
            randomBoolean() ? numDocs : randomIntBetween(10, numDocs),
            new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = first ? null : reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        if (first && docValue == null) {
            assertNull(previousValue);
        } else if (!first && docValue != null) {
            assertNotNull(previousValue);
        }
        final BytesRef value = docValue == null ? null : new BytesRef(docValue);
        if (previousValue != null && value != null) {
            if (reverse) {
                assertTrue(previousValue.compareTo(value) >= 0);
            } else {
                assertTrue(previousValue.compareTo(value) <= 0);
            }
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}

From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTests.java

License:Apache License

public void testActualMissingValue(boolean reverse) throws IOException {
    // missing value is set to an actual value
    Document d = new Document();
    final StringField s = new StringField("value", "", Field.Store.YES);
    d.add(s);//from w w w  . j a  va  2  s.  c om
    final String[] values = new String[randomIntBetween(2, 30)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = _TestUtil.randomUnicodeString(getRandom());
    }
    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(getRandom(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            s.setStringValue(value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }

    final IndexFieldData indexFieldData = getForField("value");
    final String missingValue = values[1];
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(missingValue, SortMode.MIN);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(),
            randomBoolean() ? numDocs : randomIntBetween(10, numDocs),
            new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        final BytesRef value = new BytesRef(docValue == null ? missingValue : docValue);
        if (reverse) {
            assertTrue(previousValue.compareTo(value) >= 0);
        } else {
            assertTrue(previousValue.compareTo(value) <= 0);
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}