Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:tw.com.kyle.luminance.corpus.compute.ExpNetwork.java

private int get_token_count_timerange(String node_x, String start_date_str, String end_date_str)
        throws IOException {
    int n_doc = 0;
    Query query_a = build_phrase_query(node_x);
    Weight w = query_a.createWeight(searcher, false);

    for (LeafReaderContext ctx : reader.leaves()) {
        SortedDocValues sorted_dv = ctx.reader().getSortedDocValues("timestamp");
        Scorer scorer = w.scorer(ctx);//from   w  ww.j  a v a  2  s.  c  om
        if (scorer == null) {
            continue;
        }
        DocIdSetIterator doc_it = scorer.iterator();
        int nxtDoc = 0;
        while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            String timestamp = new String(sorted_dv.get(nxtDoc).bytes, StandardCharsets.UTF_8);
            //! note that both timestamp, (start|end)_date_str are both
            //! formatted so can be ordered lexically
            int dbg1 = timestamp.compareTo(start_date_str);
            int dbg2 = timestamp.compareTo(end_date_str);
            if (timestamp.compareTo(start_date_str) >= 0 && timestamp.compareTo(end_date_str) < 0) {
                n_doc += 1;
            }
        }
    }
    return n_doc;
}

From source file:yasoco.TermScore.java

List<TermScore> selTerms(int docId, String fieldName, Query q) throws Exception {

    int num_q_terms = Integer.parseInt(prop.getProperty("num_q_terms", "10"));
    int N = reader.numDocs();
    List<TermScore> tlist = new Vector<>();

    Terms terms = reader.getTermVector(docId, fieldName); //get terms vectors for one document and one field
    if (terms == null || terms.size() == 0)
        return tlist;

    TermsEnum termsEnum = terms.iterator(null); // access the terms for this field
    BytesRef term = null;//from   ww  w .  j av  a  2s.c om

    int docLen = 0;
    while ((term = termsEnum.next()) != null) {// explore the terms for this field
        DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one
        int docIdEnum;

        while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            //get the term frequency in the document
            docLen += docsEnum.freq();
        }
    }

    termsEnum = terms.iterator(null); // access the terms for this field
    while ((term = termsEnum.next()) != null) {// explore the terms for this field
        Term t = new Term(fieldName, term);
        DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one
        int docIdEnum;

        while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            //get the term frequency in the document
            int tf = docsEnum.freq();
            float ntf = tf / (float) docLen;
            int df = (int) (reader.totalTermFreq(t));
            float idf = N / (float) df;
            float tf_idf = lambda * ntf + (1 - lambda) * idf;

            tlist.add(new TermScore(term.utf8ToString(), tf_idf));
        }
    }

    Collections.sort(tlist); // desc
    List<TermScore> topList = tlist.subList(0, Math.min(tlist.size(), num_q_terms));
    return topList;
}