List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:tw.com.kyle.luminance.corpus.compute.ExpNetwork.java
private int get_token_count_timerange(String node_x, String start_date_str, String end_date_str) throws IOException { int n_doc = 0; Query query_a = build_phrase_query(node_x); Weight w = query_a.createWeight(searcher, false); for (LeafReaderContext ctx : reader.leaves()) { SortedDocValues sorted_dv = ctx.reader().getSortedDocValues("timestamp"); Scorer scorer = w.scorer(ctx);//from w ww.j a v a 2 s. c om if (scorer == null) { continue; } DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { String timestamp = new String(sorted_dv.get(nxtDoc).bytes, StandardCharsets.UTF_8); //! note that both timestamp, (start|end)_date_str are both //! formatted so can be ordered lexically int dbg1 = timestamp.compareTo(start_date_str); int dbg2 = timestamp.compareTo(end_date_str); if (timestamp.compareTo(start_date_str) >= 0 && timestamp.compareTo(end_date_str) < 0) { n_doc += 1; } } } return n_doc; }
From source file:yasoco.TermScore.java
List<TermScore> selTerms(int docId, String fieldName, Query q) throws Exception { int num_q_terms = Integer.parseInt(prop.getProperty("num_q_terms", "10")); int N = reader.numDocs(); List<TermScore> tlist = new Vector<>(); Terms terms = reader.getTermVector(docId, fieldName); //get terms vectors for one document and one field if (terms == null || terms.size() == 0) return tlist; TermsEnum termsEnum = terms.iterator(null); // access the terms for this field BytesRef term = null;//from ww w . j av a 2s.c om int docLen = 0; while ((term = termsEnum.next()) != null) {// explore the terms for this field DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one int docIdEnum; while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //get the term frequency in the document docLen += docsEnum.freq(); } } termsEnum = terms.iterator(null); // access the terms for this field while ((term = termsEnum.next()) != null) {// explore the terms for this field Term t = new Term(fieldName, term); DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one int docIdEnum; while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //get the term frequency in the document int tf = docsEnum.freq(); float ntf = tf / (float) docLen; int df = (int) (reader.totalTermFreq(t)); float idf = N / (float) df; float tf_idf = lambda * ntf + (1 - lambda) * idf; tlist.add(new TermScore(term.utf8ToString(), tf_idf)); } } Collections.sort(tlist); // desc List<TermScore> topList = tlist.subList(0, Math.min(tlist.size(), num_q_terms)); return topList; }