Example usage for org.apache.lucene.index TermsEnum seekCeil

List of usage examples for org.apache.lucene.index TermsEnum seekCeil

Introduction

In this page you can find the example usage for org.apache.lucene.index TermsEnum seekCeil.

Prototype

public abstract SeekStatus seekCeil(BytesRef text) throws IOException;

Source Link

Document

Seeks to the specified term, if it exists, or to the next (ceiling) term.

Usage

From source file:com.github.flaxsearch.resources.TermsResource.java

License:Apache License

@GET
public TermsData getTerms(@QueryParam("segment") Integer segment, @PathParam("field") String field,
        @QueryParam("from") String startTerm, @QueryParam("filter") String filter,
        @QueryParam("encoding") @DefaultValue("utf8") String encoding,
        @QueryParam("count") @DefaultValue("50") int count) throws IOException {

    try {/*from   w w w  .  j  ava 2  s  .  c  o  m*/
        Fields fields = readerManager.getFields(segment);
        Terms terms = fields.terms(field);

        if (terms == null)
            throw new WebApplicationException("No such field " + field, Response.Status.NOT_FOUND);

        TermsEnum te = getTermsEnum(terms, filter);
        List<String> collected = new ArrayList<>();

        if (startTerm != null) {
            BytesRef start = BytesRefUtils.decode(startTerm, encoding);
            if (te.seekCeil(start) == TermsEnum.SeekStatus.END)
                return new TermsData(terms, Collections.emptyList(), encoding);
        } else {
            if (te.next() == null) {
                return new TermsData(terms, Collections.emptyList(), encoding);
            }
        }

        do {
            collected.add(BytesRefUtils.encode(te.term(), encoding));
        } while (te.next() != null && --count > 0);

        return new TermsData(terms, collected, encoding);
    } catch (NumberFormatException e) {
        throw new WebApplicationException("Field " + field + " cannot be decoded as " + encoding,
                Response.Status.BAD_REQUEST);
    }
}

From source file:com.meizu.nlp.classification.utils.DocToDoubleVectorUtils.java

License:Apache License

/**
 * create a sparse <code>Double</code> vector given doc and field term vectors using local frequency of the terms in the doc
 * @param docTerms term vectors for a given document
 * @param fieldTerms field term vectors/*from  www .ja  v a2  s  .c o m*/
 * @return a sparse vector of <code>Double</code>s as an array
 * @throws IOException in case accessing the underlying index fails
 */
public static Double[] toSparseLocalFreqDoubleArray(Terms docTerms, Terms fieldTerms) throws IOException {
    TermsEnum fieldTermsEnum = fieldTerms.iterator();
    Double[] freqVector = null;
    if (docTerms != null && fieldTerms.size() > -1) {
        freqVector = new Double[(int) fieldTerms.size()];
        int i = 0;
        TermsEnum docTermsEnum = docTerms.iterator();
        BytesRef term;
        while ((term = fieldTermsEnum.next()) != null) {
            TermsEnum.SeekStatus seekStatus = docTermsEnum.seekCeil(term);
            if (seekStatus.equals(TermsEnum.SeekStatus.END)) {
                docTermsEnum = docTerms.iterator();
            }
            if (seekStatus.equals(TermsEnum.SeekStatus.FOUND)) {
                long termFreqLocal = docTermsEnum.totalTermFreq(); // the total number of occurrences of this term in the given document
                freqVector[i] = Long.valueOf(termFreqLocal).doubleValue();
            } else {
                freqVector[i] = 0d;
            }
            i++;
        }
    }
    return freqVector;
}

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();//  w ww  .j a v a2 s  .  co m
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;
    TermsEnum rightEnum = null;/*from w w w.  ja v a 2 s .c o  m*/

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte newbytes[] = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();

        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));

        SeekStatus leftStatus;
        SeekStatus rightStatus;

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}

From source file:com.rondhuit.w2v.lucene.LuceneIndexCorpus.java

License:Apache License

@Override
public void learnVocab() throws IOException {
    super.learnVocab();

    final String field = ((LuceneIndexConfig) config).getField();
    final Terms terms = MultiFields.getTerms(reader, field);
    final BytesRef maxTerm = terms.getMax();
    final BytesRef minTerm = terms.getMin();
    Query q = new TermRangeQuery(field, minTerm, maxTerm, true, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    topDocs = searcher.search(q, Integer.MAX_VALUE);

    TermsEnum termsEnum = null;
    termsEnum = terms.iterator(termsEnum);

    termsEnum.seekCeil(new BytesRef());
    BytesRef term = termsEnum.term();/*from  w w  w . j ava 2s.c om*/
    while (term != null) {
        int p = addWordToVocab(term.utf8ToString());
        vocab[p].setCn((int) termsEnum.totalTermFreq());
        term = termsEnum.next();
    }
}

From source file:com.senseidb.abacus.api.codec.CodecTest.java

License:Apache License

static void testThreaded(int numThreads, final int numIter, final AtomicReader reader, final String field) {
    Runnable runnable = new Runnable() {
        public void run() {
            try {
                Fields f = reader.fields();
                Terms t = f.terms(field);

                TermsEnum te = t.iterator(null);

                ArrayList<BytesRef> termList = new ArrayList<BytesRef>();

                BytesRef termText;/*from  www  .j a va2s . c  o  m*/
                while ((termText = te.next()) != null) {
                    termList.add(termText);
                }

                Random rand = new Random();

                for (int i = 0; i < numIter; ++i) {
                    int idx = rand.nextInt(termList.size());
                    termText = termList.get(idx);
                    te = t.iterator(null);
                    te.seekCeil(termText);
                    DocsEnum de = te.docs(null, null);
                    int doc;
                    while ((doc = de.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    }

                    de = te.docs(null, null);
                    doc = -1;
                    while ((doc = de.advance(doc + 2)) != DocIdSetIterator.NO_MORE_DOCS) {
                    }
                }

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    };

    Thread[] threads = new Thread[numThreads];
    for (int i = 0; i < numThreads; ++i) {
        threads[i] = new Thread(runnable);
    }
    for (int i = 0; i < numThreads; ++i) {
        threads[i].start();
    }
    for (int i = 0; i < numThreads; ++i) {
        try {
            threads[i].join();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
}

From source file:org.apache.blur.command.TermsCommand.java

License:Apache License

private static List<String> terms(IndexReader reader, String fieldName, String startWith, short size)
        throws IOException {

    Term term = getTerm(fieldName, startWith);
    List<String> terms = new ArrayList<String>(size);
    AtomicReader areader = BlurUtil.getAtomicReader(reader);
    Terms termsAll = areader.terms(term.field());

    if (termsAll == null) {
        return terms;
    }//from   w  w w.j  a va  2  s.c  o m

    TermsEnum termEnum = termsAll.iterator(null);

    SeekStatus status = termEnum.seekCeil(term.bytes());

    if (status == SeekStatus.END) {
        return terms;
    }

    BytesRef currentTermText = termEnum.term();
    do {
        terms.add(currentTermText.utf8ToString());
        if (terms.size() >= size) {
            return terms;
        }
    } while ((currentTermText = termEnum.next()) != null);
    return terms;
}

From source file:org.apache.blur.manager.IndexManager.java

License:Apache License

public static List<String> terms(IndexReader reader, FieldTypeDefinition typeDef, String columnFamily,
        String columnName, String startWith, short size) throws IOException {
    if (startWith == null) {
        startWith = "";
    }//from w  ww  .j a v a2  s.  c o m
    Term term = getTerm(columnFamily, columnName, startWith);
    List<String> terms = new ArrayList<String>(size);
    AtomicReader areader = BlurUtil.getAtomicReader(reader);
    Terms termsAll = areader.terms(term.field());

    if (termsAll == null) {
        return terms;
    }

    TermsEnum termEnum = termsAll.iterator(null);
    SeekStatus status = termEnum.seekCeil(term.bytes());

    if (status == SeekStatus.END) {
        return terms;
    }

    BytesRef currentTermText = termEnum.term();
    do {
        terms.add(currentTermText.utf8ToString());
        String readTerm = typeDef.readTerm(currentTermText);
        if (readTerm != null)
            terms.add(readTerm);
        if (terms.size() >= size) {
            return terms;
        }
    } while ((currentTermText = termEnum.next()) != null);
    return terms;
}

From source file:org.apache.solr.handler.component.TermsComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false))
        return;//from ww  w  .  j a  v a  2  s .  c o  m

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0)
        return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
        limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort = !TermsParams.TERMS_SORT_INDEX
            .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
        freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
        NamedList<Integer> fieldTerms = new NamedList<Integer>();
        termsResult.add(field, fieldTerms);

        Terms terms = lfields == null ? null : lfields.terms(field);
        if (terms == null) {
            // no terms for this field
            continue;
        }

        FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
        if (ft == null)
            ft = new StrField();

        // prefix must currently be text
        BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

        BytesRef upperBytes = null;
        if (upperStr != null) {
            upperBytes = new BytesRef();
            ft.readableToIndexed(upperStr, upperBytes);
        }

        BytesRef lowerBytes;
        if (lowerStr == null) {
            // If no lower bound was specified, use the prefix
            lowerBytes = prefixBytes;
        } else {
            lowerBytes = new BytesRef();
            if (raw) {
                // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
                // perhaps we detect if the FieldType is non-character and expect hex if so?
                lowerBytes = new BytesRef(lowerStr);
            } else {
                lowerBytes = new BytesRef();
                ft.readableToIndexed(lowerStr, lowerBytes);
            }
        }

        TermsEnum termsEnum = terms.iterator(null);
        BytesRef term = null;

        if (lowerBytes != null) {
            if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
                //Only advance the enum if we are excluding the lower bound and the lower Term actually matches
                if (lowerIncl == false && term.equals(lowerBytes)) {
                    term = termsEnum.next();
                }
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }

        int i = 0;
        BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
                ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit)
                : null);
        CharsRef external = new CharsRef();
        while (term != null && (i < limit || sort)) {
            boolean externalized = false; // did we fill in "external" yet for this term?

            // stop if the prefix doesn't match
            if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes))
                break;

            if (pattern != null) {
                // indexed text or external text?
                // TODO: support "raw" mode?
                ft.indexedToReadable(term, external);
                externalized = true;
                if (!pattern.matcher(external).matches()) {
                    term = termsEnum.next();
                    continue;
                }
            }

            if (upperBytes != null) {
                int upperCmp = term.compareTo(upperBytes);
                // if we are past the upper term, or equal to it (when don't include upper) then stop.
                if (upperCmp > 0 || (upperCmp == 0 && !upperIncl))
                    break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termsEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
                // add the term to the list
                if (sort) {
                    queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
                } else {

                    // TODO: handle raw somehow
                    if (!externalized) {
                        ft.indexedToReadable(term, external);
                    }
                    fieldTerms.add(external.toString(), docFreq);
                    i++;
                }
            }

            term = termsEnum.next();
        }

        if (sort) {
            for (CountPair<BytesRef, Integer> item : queue) {
                if (i >= limit)
                    break;
                ft.indexedToReadable(item.key, external);
                fieldTerms.add(external.toString(), item.val);
                i++;
            }
        }
    }
}

From source file:org.apache.solr.request.NumericFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    final boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumericType numericType = ft.getNumericType();
    if (numericType == null) {
        throw new IllegalStateException();
    }/*from  ww  w .  j a  va 2 s . c  o m*/
    final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();

    // 1. accumulate
    final HashTable hashTable = new HashTable();
    final Iterator<AtomicReaderContext> ctxIt = leaves.iterator();
    AtomicReaderContext ctx = null;
    FieldCache.Longs longs = null;
    Bits docsWithField = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch (numericType) {
            case LONG:
                longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
                break;
            case INT:
                final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return ints.get(docID);
                    }
                };
                break;
            case FLOAT:
                final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.floatToSortableInt(floats.get(docID));
                    }
                };
                break;
            case DOUBLE:
                final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.doubleToSortableLong(doubles.get(docID));
                    }
                };
                break;
            default:
                throw new AssertionError();
            }
            docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
        }
        long v = longs.get(doc - ctx.docBase);
        if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
            hashTable.add(doc, v, 1);
        } else {
            ++missingCount;
        }
    }

    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }

    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<Integer>();

    // This stuff is complicated because if facet.mincount=0, the counts needs
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort)
            || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<Entry>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }

        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }

        if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
            if (!sf.indexed()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
                        + sf.getName() + " which is not indexed");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<String>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator(null);
                BytesRef term;
                switch (termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
                }
                final CharsRef spare = new CharsRef();
                for (int skipped = hashTable.size; skipped < offset && term != null
                        && StringHelper.startsWith(term, prefix);) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix)
                        && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // sort=index, mincount=0 and we have less than limit items
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
                    + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<String, Integer>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator(null);
            BytesRef term;
            switch (termsEnum.seekCeil(prefix)) {
            case FOUND:
            case NOT_FOUND:
                term = termsEnum.term();
                break;
            case END:
                term = null;
                break;
            default:
                throw new AssertionError();
            }
            final CharsRef spare = new CharsRef();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix)
                    && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }

    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}