Example usage for org.apache.lucene.index TermsEnum ord

List of usage examples for org.apache.lucene.index TermsEnum ord

Introduction

In this page you can find the example usage for org.apache.lucene.index TermsEnum ord.

Prototype

public abstract long ord() throws IOException;

Source Link

Document

Returns ordinal position for current term.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();/*from   ww  w  .j  a v  a  2s. co  m*/
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:org.apache.solr.request.TestFaceting.java

License:Apache License

void doTermEnum(int size) throws Exception {
    //System.out.println("doTermEnum size=" + size);
    close();/* w ww .  j a  v  a  2 s. c o m*/
    createIndex(size);
    req = lrf.makeRequest("q", "*:*");

    UnInvertedField uif = new UnInvertedField(proto.field(), req.getSearcher());

    assertEquals(size, uif.getNumTerms());

    TermsEnum te = uif.getOrdTermsEnum(req.getSearcher().getAtomicReader());
    assertEquals(size == 0, te == null);

    Random r = new Random(size);
    // test seeking by term string
    for (int i = 0; i < size * 2 + 10; i++) {
        int rnum = r.nextInt(size + 2);
        String s = t(rnum);
        //System.out.println("s=" + s);
        final BytesRef br;
        if (te == null) {
            br = null;
        } else {
            TermsEnum.SeekStatus status = te.seekCeil(new BytesRef(s));
            if (status == TermsEnum.SeekStatus.END) {
                br = null;
            } else {
                br = te.term();
            }
        }
        assertEquals(br != null, rnum < size);
        if (rnum < size) {
            assertEquals(rnum, (int) te.ord());
            assertEquals(s, te.term().utf8ToString());
        }
    }

    // test seeking before term
    if (size > 0) {
        assertEquals(size > 0, te.seekCeil(new BytesRef("000")) != TermsEnum.SeekStatus.END);
        assertEquals(0, te.ord());
        assertEquals(t(0), te.term().utf8ToString());
    }

    if (size > 0) {
        // test seeking by term number
        for (int i = 0; i < size * 2 + 10; i++) {
            int rnum = r.nextInt(size);
            String s = t(rnum);
            te.seekExact((long) rnum);
            BytesRef br = te.term();
            assertNotNull(br);
            assertEquals(rnum, (int) te.ord());
            assertEquals(s, te.term().utf8ToString());
        }
    }
}

From source file:org.apache.solr.request.UnInvertedField.java

License:Apache License

public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit,
        Integer mincount, boolean missing, String sort, String prefix) throws IOException {
    use.incrementAndGet();/*from   www .  j  a va2  s.com*/

    FieldType ft = searcher.getSchema().getFieldType(field);

    NamedList<Integer> res = new NamedList<Integer>(); // order is important

    DocSet docs = baseDocs;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();

    //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
    if (baseSize >= mincount) {

        final int[] index = this.index;
        // tricky: we add more more element than we need because we will reuse this array later
        // for ordering term ords before converting to term labels.
        final int[] counts = new int[numTermsInField + 1];

        //
        // If there is prefix, find it's start and end term numbers
        //
        int startTerm = 0;
        int endTerm = numTermsInField; // one past the end

        TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
        if (te != null && prefix != null && prefix.length() > 0) {
            final BytesRef prefixBr = new BytesRef(prefix);
            if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
                startTerm = numTermsInField;
            } else {
                startTerm = (int) te.ord();
            }
            prefixBr.append(UnicodeUtil.BIG_TERM);
            if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
                endTerm = numTermsInField;
            } else {
                endTerm = (int) te.ord();
            }
        }

        /***********
        // Alternative 2: get the docSet of the prefix (could take a while) and
        // then do the intersection with the baseDocSet first.
        if (prefix != null && prefix.length() > 0) {
          docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
          // The issue with this method are problems of returning 0 counts for terms w/o
          // the prefix.  We can't just filter out those terms later because it may
          // mean that we didn't collect enough terms in the queue (in the sorted case).
        }
        ***********/

        boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && startTerm == 0
                && endTerm == numTermsInField && docs instanceof BitDocSet;

        if (doNegative) {
            OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone();
            bs.flip(0, maxDoc);
            // TODO: when iterator across negative elements is available, use that
            // instead of creating a new bitset and inverting.
            docs = new BitDocSet(bs, maxDoc - baseSize);
            // simply negating will mean that we have deleted docs in the set.
            // that should be OK, as their entries in our table should be empty.
            //System.out.println("  NEG");
        }

        // For the biggest terms, do straight set intersections
        for (TopTerm tt : bigTerms.values()) {
            //System.out.println("  do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
            // TODO: counts could be deferred if sorted==false
            if (tt.termNum >= startTerm && tt.termNum < endTerm) {
                counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
                //System.out.println("    count=" + counts[tt.termNum]);
            } else {
                //System.out.println("SKIP term=" + tt.termNum);
            }
        }

        // TODO: we could short-circuit counting altogether for sorted faceting
        // where we already have enough terms from the bigTerms

        // TODO: we could shrink the size of the collection array, and
        // additionally break when the termNumber got above endTerm, but
        // it would require two extra conditionals in the inner loop (although
        // they would be predictable for the non-prefix case).
        // Perhaps a different copy of the code would be warranted.

        if (termInstances > 0) {
            DocIterator iter = docs.iterator();
            while (iter.hasNext()) {
                int doc = iter.nextDoc();
                //System.out.println("iter doc=" + doc);
                int code = index[doc];

                if ((code & 0xff) == 1) {
                    //System.out.println("  ptr");
                    int pos = code >>> 8;
                    int whichArray = (doc >>> 16) & 0xff;
                    byte[] arr = tnums[whichArray];
                    int tnum = 0;
                    for (;;) {
                        int delta = 0;
                        for (;;) {
                            byte b = arr[pos++];
                            delta = (delta << 7) | (b & 0x7f);
                            if ((b & 0x80) == 0)
                                break;
                        }
                        if (delta == 0)
                            break;
                        tnum += delta - TNUM_OFFSET;
                        //System.out.println("    tnum=" + tnum);
                        counts[tnum]++;
                    }
                } else {
                    //System.out.println("  inlined");
                    int tnum = 0;
                    int delta = 0;
                    for (;;) {
                        delta = (delta << 7) | (code & 0x7f);
                        if ((code & 0x80) == 0) {
                            if (delta == 0)
                                break;
                            tnum += delta - TNUM_OFFSET;
                            //System.out.println("    tnum=" + tnum);
                            counts[tnum]++;
                            delta = 0;
                        }
                        code >>>= 8;
                    }
                }
            }
        }
        final CharsRef charsRef = new CharsRef();

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, numTermsInField);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            //System.out.println("START=" + startTerm + " END=" + endTerm);
            for (int i = startTerm; i < endTerm; i++) {
                int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // now select the right page from the results

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            final int[] indirect = counts; // reuse the counts array for the index into the tnums array
            assert indirect.length >= sortedIdxEnd;

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;

                indirect[i] = i; // store the index for indirect sorting
                sorted[i] = tnum; // reuse the "sorted" array to store the term numbers for indirect sorting

                // add a null label for now... we'll fill it in later.
                res.add(null, c);
            }

            // now sort the indexes by the term numbers
            PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() {
                @Override
                public int compare(int a, int b) {
                    return (int) sorted[a] - (int) sorted[b];
                }

                @Override
                public boolean lessThan(int a, int b) {
                    return sorted[a] < sorted[b];
                }

                @Override
                public boolean equals(int a, int b) {
                    return sorted[a] == sorted[b];
                }
            });

            // convert the term numbers to term values and set
            // as the label
            //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                int idx = indirect[i];
                int tnum = (int) sorted[idx];
                final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
                //System.out.println("  label=" + label);
                res.setName(idx - sortedIdxStart, label);
            }

        } else {
            // add results in index order
            int i = startTerm;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i = startTerm + off;
                off = 0;
            }

            for (; i < endTerm; i++) {
                int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;

                final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
                res.add(label, c);
            }
        }
    }

    if (missing) {
        // TODO: a faster solution for this?
        res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
    }

    //System.out.println("  res=" + res);

    return res;
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        throws Exception {

    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE,
            TestUtil.nextInt(random(), 2, 10));

    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    /*/*from w  w  w. j  a  v a 2 s.c  o m*/
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }

    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse(
                            "term=" + termsEnum.term().utf8ToString() + " matches prefix="
                                    + prefixRef.utf8ToString(),
                            StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                    // ok
                }
            } else {
                // ok
            }
        }
        return;
    }

    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }

        if (VERBOSE) {
            System.out.println(
                    "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord="
                    + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testSortedTermsEnum() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);// ww w .j a  v a  2s. co  m

    doc = new Document();
    doc.add(new StringField("field", "world", Field.Store.NO));
    // we need a second value for a doc, or we don't actually test DocTermOrds!
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "beer", Field.Store.NO));
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);

    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();

    LeafReader ar = getOnlyLeafReader(ireader);
    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
    assertEquals(3, dv.getValueCount());

    TermsEnum termsEnum = dv.termsEnum();

    // next()
    assertEquals("beer", termsEnum.next().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals("hello", termsEnum.next().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals("world", termsEnum.next().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // seekCeil()
    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));

    // seekExact()
    assertTrue(termsEnum.seekExact(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("hello")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("world")));
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    assertFalse(termsEnum.seekExact(new BytesRef("bogus")));

    // seek(ord)
    termsEnum.seekExact(0);
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    termsEnum.seekExact(1);
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    termsEnum.seekExact(2);
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // lookupTerm(BytesRef) 
    assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
    assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
    assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
    assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
    assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
    assertEquals(2, dv.lookupTerm(new BytesRef("world")));
    assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));

    ireader.close();
    directory.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
    BytesRef ref;//from  w w  w. ja  va 2s . c o  m

    // sequential next() through all terms
    while ((ref = expected.next()) != null) {
        assertEquals(ref, actual.next());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    assertNull(actual.next());

    // sequential seekExact(ord) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        actual.seekExact(i);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }

    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertTrue(actual.seekExact(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }

    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }

    // random seekExact(ord)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(randomOrd);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }

    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(expected.term());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }

    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
        SeekStatus expectedStatus = expected.seekCeil(target);
        assertEquals(expectedStatus, actual.seekCeil(target));
        if (expectedStatus != SeekStatus.END) {
            assertEquals(expected.ord(), actual.ord());
            assertEquals(expected.term(), actual.term());
        }
    }
}

From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java

License:Apache License

public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException {

    final IndexedField fld = models.indexedField(fieldName);
    final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap();
    final TIntIntHashMap ordCounts = new TIntIntHashMap();

    if (filter != null) {
        filter = filter.toLowerCase();//from  w  w  w .j av  a  2  s . c om
    }

    // Get count of segments.
    //
    int sz = ir.leaves().size();

    for (int i = 0; i != sz; i++) {
        // Get the segment reader.
        //
        LeafReader lr = ir.leaves().get(i).reader();

        // Doc count for field. Eg "album_genres"
        //
        lr.getDocCount(fld.getName());

        // Get all documents that have the field "album_genres"
        //
        Bits docs = lr.getDocsWithField(fld.getName());
        ordCounts.clear();

        // Enumerate the field terms.
        //
        if (fld.isDocValues()) {
            if (fld.isMultiValue()) {
                // docvalues & multivalue is a SortedSetDocValues
                // Per-Document values in a SortedDocValues are
                // deduplicated, dereferenced, and sorted into a dictionary
                // of
                // unique values. A pointer to the dictionary value
                // (ordinal) can be retrieved for each document.
                // Ordinals are dense and in increasing sorted order.
                //
                SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Enumerate the set of [terms] of
                            // "album_genres" for the document represented
                            // by docId.
                            // Each ord represents the term value.
                            //
                            set.setDocument(docId);

                            // For each term bump up the frequency.
                            //
                            long ord;
                            while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                                ordCounts.adjustOrPutValue((int) ord, 1, 1);

                                System.out.println("term=" + set.lookupOrd(ord).utf8ToString());
                            }
                        }
                    }

                    TermsEnum te = set.termsEnum();
                    BytesRef term;

                    while ((term = te.next()) != null) {

                        int ord = (int) te.ord();

                        add(fld, valuesMap, filter, term, ordCounts.get(ord));
                    }

                }

            } else {
                SortedDocValues set = lr.getSortedDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Get the term - Classical, Rock, etc.
                            //
                            BytesRef term = set.get(docId);

                            add(fld, valuesMap, filter, term, 1);
                        }
                    }
                }
            }
        } else {
            // Normal field, not a doc value.
            //
            Terms terms = lr.terms(fld.getName());
            TermsEnum te = terms.iterator();

            BytesRef term;
            while ((term = te.next()) != null) {
                add(fld, valuesMap, filter, term, te.docFreq());
            }
        }

        /*
         * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark"
         * 
         * doc[0] = 0 doc[1] = 1 doc[2] = 0
         * 
         * term[0] = "aardvark" term[1] = "beaver"
         */

        // http://127.0.0.1:8080/api/facets?fields=track_title_a
        // the above should return B:(4) because titles starting with B are
        // 4!
    }

    // Get the array of term counters.
    //
    IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]);

    // Sort by term.
    //
    Arrays.sort(list);

    // add to the map.
    //
    this.fieldCounts.put(fld.getName(), list);

    return this;
}