List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:proj.zoie.api.UIDDocIdSet.java
License:Apache License
@Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int doc = -1; int current = -1; @Override//from w w w . j a v a2 s .co m public int docID() { return doc; } @Override public int nextDoc() throws IOException { if (current < _sorted.length - 1) { current++; doc = _sorted[current]; return doc; } return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { int idx = Arrays.binarySearch(_sorted, target); if (idx < 0) { idx = -(idx + 1); if (idx >= _sorted.length) return DocIdSetIterator.NO_MORE_DOCS; } current = idx; doc = _sorted[current]; return doc; } }; }
From source file:proj.zoie.impl.indexing.internal.LuceneIndexDataLoader.java
License:Apache License
protected final int purgeDocuments() { synchronized (_optimizeMonitor) { if (_purgeFilter != null) { BaseSearchIndex<R> idx = getSearchIndex(); IndexReader writeReader = null; log.info("purging docs started..."); int count = 0; long start = System.currentTimeMillis(); ZoieIndexReader<R> reader = null; try { synchronized (idx) { idx.refresh(false);/*from ww w.ja v a 2 s . c o m*/ reader = idx.openIndexReader(); if (reader != null) reader.incZoieRef(); } if (reader != null) { writeReader = idx.openIndexReaderForDelete(); if (writeReader != null) { DocIdSetIterator iter = _purgeFilter.getDocIdSet(reader).iterator(); int doc; while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; writeReader.deleteDocument(doc); } } } } catch (Throwable th) { log.error("problem creating purge filter: " + th.getMessage(), th); } finally { if (reader != null) { reader.decZoieRef(); } if (writeReader != null) { try { writeReader.close(); } catch (IOException ioe) { ZoieHealth.setFatal(); log.error(ioe.getMessage(), ioe); } } } long end = System.currentTimeMillis(); log.info("purging docs completed in " + (end - start) + "ms"); log.info("total docs purged: " + count); return count; } return 0; } }
From source file:retriever.TermWt.java
DocVector(IndexReader reader, int docId) throws Exception { this.reader = reader; Terms terms = reader.getTermVector(docId, FIELD_ANALYZED_CONTENT); TermsEnum termsEnum;/* ww w . j av a 2 s . c o m*/ BytesRef term; List<TermWt> tfvec = new ArrayList<>(); // Construct the normalized tf vector termsEnum = terms.iterator(null); // access the terms for this field while ((term = termsEnum.next()) != null) { // explore the terms for this field String termStr = term.utf8ToString(); DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { //get the term frequency in the document int tf = docsEnum.freq(); tfvec.add(new TermWt(termStr, tf)); } } Collections.sort(tfvec); vec = new TermWt[tfvec.size()]; vec = tfvec.toArray(vec); }
From source file:retriever.TermFreq.java
public String getTfVectorString(int docId) throws Exception { Terms terms = reader.getTermVector(docId, FIELD_ANALYZED_CONTENT); if (terms == null || terms.size() == 0) return ""; TermsEnum termsEnum;/* w ww. j av a 2 s . c om*/ BytesRef term; List<TermFreq> tfvec = new ArrayList<>(); // Construct the normalized tf vector termsEnum = terms.iterator(null); // access the terms for this field while ((term = termsEnum.next()) != null) { // explore the terms for this field String termStr = term.utf8ToString(); if (isNumber(termStr)) continue; DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { //get the term frequency in the document int tf = docsEnum.freq(); float idf = numDocsInCollection / (float) reader.docFreq(new Term(FIELD_ANALYZED_CONTENT, term)); tfvec.add(new TermFreq(termStr, tf, idf)); } } Collections.sort(tfvec); StringBuffer buff = new StringBuffer(); for (TermFreq tf : tfvec) buff.append(tf.term).append(":").append(tf.tf).append(", ").append(tf.idf).append(" "); if (buff.length() > 2) { buff.deleteCharAt(buff.length() - 1); buff.deleteCharAt(buff.length() - 1); } return buff.toString(); }
From source file:searchenginelucene.LuceneSearchEngine.java
public static void getTermFrequencyPairs(String indexLocation) throws IOException { Map<String, Integer> termfrequency = new HashMap<String, Integer>(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); // Temporary location to store the interediate term frequency results PrintWriter writer_tf = new PrintWriter("..\\terf-frequency.csv"); int docnum = reader.numDocs(); // System.out.println("docnum:" + docnum); Fields fields1 = MultiFields.getFields(reader); for (String field : fields1) { Terms terms1 = fields1.terms("contents"); TermsEnum termsEnum = terms1.iterator(null); int noWords = 0; while (termsEnum.next() != null) { noWords++;//from w ww . j a v a 2 s . c o m int count = 0; DocsEnum docsEnum = termsEnum.docs(null, null); int docIdEnum; //System.out.print("The term is->" + termsEnum.term().utf8ToString()); while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count += docsEnum.freq(); } //System.out.println("count:" + count); termfrequency.put(termsEnum.term().utf8ToString(), count); } System.out.println("Total Number of Words:" + noWords); } // ========================================================= // Write the terms anf their frequencies in a file // ========================================================= for (String key : termfrequency.keySet()) { writer_tf.print(key + ","); writer_tf.println(termfrequency.get(key)); } writer_tf.close(); }
From source file:solr2155.solr.search.function.GeoHashValueSource.java
License:Apache License
@SuppressWarnings({ "unchecked" }) GeoHashValueSource(String fieldName, SolrIndexSearcher searcher) throws IOException { log.info("Loading geohash field " + fieldName + " into memory."); this.fieldName = fieldName; //Get gridReferenceSystem final GridNode.GridReferenceSystem gridReferenceSystem; FieldType fieldType = searcher.getSchema().getField(fieldName).getType(); if (fieldType instanceof GeoHashField) { gridReferenceSystem = ((GeoHashField) fieldType).getGridReferenceSystem(); } else// ww w . j a v a 2 s. c o m throw new RuntimeException( "field " + fieldName + " should be a GeoHashField, not " + fieldType.getTypeName()); //Traverse the index to load up doc2PointsCache IndexReader reader = searcher.getIndexReader(); TermsEnumCompatibility termsEnum = new TermsEnumCompatibility(reader, fieldName); TermDocs termDocs = reader.termDocs(); //cached for termsEnum.docs() calls try { while (true) { final Term term = termsEnum.next(); if (term == null) break; if (term.text().length() != gridReferenceSystem.getPrecision()) continue; Point2D point = gridReferenceSystem.decodeXY(term.text()); termDocs.seek(termsEnum.getTermEnum()); while (termDocs.next()) { final int docId = termDocs.doc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) break; if (doc2PointsCache == null) doc2PointsCache = (List<Point2D>[]) new List[reader.maxDoc()];//java generics hack List<Point2D> points = doc2PointsCache[docId]; if (points == null) { points = new ArrayList<Point2D>(DEFAULT_ARRAY_CAPACITY); doc2PointsCache[docId] = points; } points.add(point); } } } finally { // in Lucene 3 these should be closed (not in Lucene 4) termDocs.close(); termsEnum.close(); } //Log statistics if (log.isInfoEnabled()) { int min = Integer.MAX_VALUE, sum = 0, max = 0; int dlen = 0; if (doc2PointsCache != null) { dlen = doc2PointsCache.length; for (List<Point2D> point2Ds : doc2PointsCache) { int plen = (point2Ds == null ? 0 : point2Ds.size()); min = Math.min(min, plen); max = Math.max(max, plen); sum += plen; } } if (min == Integer.MAX_VALUE) min = 0; float avg = (float) sum / dlen; log.info("field '" + fieldName + "' in RAM: loaded min/avg/max per doc #: (" + min + "," + avg + "," + max + ") #" + dlen); } }
From source file:solutions.siren.join.action.terms.collector.BitSetHitStream.java
License:Open Source License
@Override public void next() { LimitedBitSetHitCollector collector = (LimitedBitSetHitCollector) this.getCollector(); int atomicDocId = this.currentAtomicDocId; if (currentAtomicReaderId < collector.getFixedSets().size()) { do {/*from w w w .j a va 2 s . c o m*/ FixedBitSet bitSet = collector.getFixedSets().get(currentAtomicReaderId); if (atomicDocId == DocIdSetIterator.NO_MORE_DOCS) { // we start a new reader, reset the doc id atomicDocId = -1; } atomicDocId = atomicDocId + 1 < bitSet.length() ? bitSet.nextSetBit(atomicDocId + 1) : DocIdSetIterator.NO_MORE_DOCS; } while (atomicDocId == DocIdSetIterator.NO_MORE_DOCS && ++currentAtomicReaderId < collector.getFixedSets().size()); } this.currentAtomicDocId = atomicDocId; this.count++; }
From source file:tech.beshu.ror.es.security.DocumentFilterReader.java
License:Open Source License
private DocumentFilterReader(LeafReader reader, Query query) throws IOException { super(reader); final IndexSearcher searcher = new IndexSearcher(this); searcher.setQueryCache(null);/* w w w . ja v a 2s. co m*/ final boolean needsScores = false; final Weight preserveWeight = searcher.createWeight(query, needsScores, 0); final int maxDoc = this.in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); final Scorer preserveScorer = preserveWeight.scorer(this.getContext()); if (preserveScorer != null) { bits.or(preserveScorer.iterator()); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); }
From source file:tw.com.kyle.luminance.corpus.compute.CollocateFromIndex.java
private List<Integer> get_discourse_doc_id() throws IOException { Query cq = new TermQuery(new Term("class", "discourse")); IndexSearcher searcher = new IndexSearcher(reader); Weight w = cq.createWeight(searcher, false); List<Integer> docid_list = new ArrayList<>(); for (LeafReaderContext ctx : reader.leaves()) { Scorer scorer = w.scorer(ctx);/*from w ww . ja v a2 s .c o m*/ DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int doc_freq = scorer.freq(); docid_list.add(nxtDoc); } } return docid_list; }
From source file:tw.com.kyle.luminance.corpus.compute.ExpNetwork.java
private int get_common_doc_count(String node_x, String node_y, String doc_type, String start_date_str, String end_date_str) throws IOException { int n_doc = 0; Query query_a = build_phrase_query(node_x); Query query_b = build_phrase_query(node_y); Query query_c = new TermQuery(new Term("class", doc_type)); BooleanQuery.Builder bquery = new BooleanQuery.Builder(); bquery.add(query_a, BooleanClause.Occur.MUST); bquery.add(query_b, BooleanClause.Occur.MUST); bquery.add(query_c, BooleanClause.Occur.MUST); Weight w = bquery.build().createWeight(searcher, false); for (LeafReaderContext ctx : reader.leaves()) { SortedDocValues sorted_dv = ctx.reader().getSortedDocValues("timestamp"); Scorer scorer = w.scorer(ctx);/*from w ww . j ava 2 s . c om*/ if (scorer == null) { continue; } DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { String timestamp = new String(sorted_dv.get(nxtDoc).bytes, StandardCharsets.UTF_8); //! note that both timestamp, (start|end)_date_str are both //! formatted so can be ordered lexically int dbg1 = timestamp.compareTo(start_date_str); int dbg2 = timestamp.compareTo(end_date_str); if (timestamp.compareTo(start_date_str) >= 0 && timestamp.compareTo(end_date_str) < 0) { n_doc += 1; } } } return n_doc; }