List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc
public abstract int nextDoc() throws IOException;
From source file:proj.zoie.impl.indexing.internal.LuceneIndexDataLoader.java
License:Apache License
protected final int purgeDocuments() { synchronized (_optimizeMonitor) { if (_purgeFilter != null) { BaseSearchIndex<R> idx = getSearchIndex(); IndexReader writeReader = null; log.info("purging docs started..."); int count = 0; long start = System.currentTimeMillis(); ZoieIndexReader<R> reader = null; try { synchronized (idx) { idx.refresh(false);//from w ww.j a v a2s. c o m reader = idx.openIndexReader(); if (reader != null) reader.incZoieRef(); } if (reader != null) { writeReader = idx.openIndexReaderForDelete(); if (writeReader != null) { DocIdSetIterator iter = _purgeFilter.getDocIdSet(reader).iterator(); int doc; while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; writeReader.deleteDocument(doc); } } } } catch (Throwable th) { log.error("problem creating purge filter: " + th.getMessage(), th); } finally { if (reader != null) { reader.decZoieRef(); } if (writeReader != null) { try { writeReader.close(); } catch (IOException ioe) { ZoieHealth.setFatal(); log.error(ioe.getMessage(), ioe); } } } long end = System.currentTimeMillis(); log.info("purging docs completed in " + (end - start) + "ms"); log.info("total docs purged: " + count); return count; } return 0; } }
From source file:tech.beshu.ror.es.security.DocumentFilterReader.java
License:Open Source License
private DocumentFilterReader(LeafReader reader, Query query) throws IOException { super(reader); final IndexSearcher searcher = new IndexSearcher(this); searcher.setQueryCache(null);// w w w .j av a 2 s . c o m final boolean needsScores = false; final Weight preserveWeight = searcher.createWeight(query, needsScores, 0); final int maxDoc = this.in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); final Scorer preserveScorer = preserveWeight.scorer(this.getContext()); if (preserveScorer != null) { bits.or(preserveScorer.iterator()); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); }
From source file:tw.com.kyle.luminance.corpus.compute.CollocateFromIndex.java
private List<Integer> get_discourse_doc_id() throws IOException { Query cq = new TermQuery(new Term("class", "discourse")); IndexSearcher searcher = new IndexSearcher(reader); Weight w = cq.createWeight(searcher, false); List<Integer> docid_list = new ArrayList<>(); for (LeafReaderContext ctx : reader.leaves()) { Scorer scorer = w.scorer(ctx);//from www . java2 s .co m DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int doc_freq = scorer.freq(); docid_list.add(nxtDoc); } } return docid_list; }
From source file:tw.com.kyle.luminance.corpus.compute.ExpNetwork.java
private int get_common_doc_count(String node_x, String node_y, String doc_type, String start_date_str, String end_date_str) throws IOException { int n_doc = 0; Query query_a = build_phrase_query(node_x); Query query_b = build_phrase_query(node_y); Query query_c = new TermQuery(new Term("class", doc_type)); BooleanQuery.Builder bquery = new BooleanQuery.Builder(); bquery.add(query_a, BooleanClause.Occur.MUST); bquery.add(query_b, BooleanClause.Occur.MUST); bquery.add(query_c, BooleanClause.Occur.MUST); Weight w = bquery.build().createWeight(searcher, false); for (LeafReaderContext ctx : reader.leaves()) { SortedDocValues sorted_dv = ctx.reader().getSortedDocValues("timestamp"); Scorer scorer = w.scorer(ctx);/*from w w w . j av a 2 s .c om*/ if (scorer == null) { continue; } DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { String timestamp = new String(sorted_dv.get(nxtDoc).bytes, StandardCharsets.UTF_8); //! note that both timestamp, (start|end)_date_str are both //! formatted so can be ordered lexically int dbg1 = timestamp.compareTo(start_date_str); int dbg2 = timestamp.compareTo(end_date_str); if (timestamp.compareTo(start_date_str) >= 0 && timestamp.compareTo(end_date_str) < 0) { n_doc += 1; } } } return n_doc; }
From source file:tw.com.kyle.luminance.corpus.compute.ExpNetwork.java
private int get_token_count_timerange(String node_x, String start_date_str, String end_date_str) throws IOException { int n_doc = 0; Query query_a = build_phrase_query(node_x); Weight w = query_a.createWeight(searcher, false); for (LeafReaderContext ctx : reader.leaves()) { SortedDocValues sorted_dv = ctx.reader().getSortedDocValues("timestamp"); Scorer scorer = w.scorer(ctx);// ww w . j av a2s.co m if (scorer == null) { continue; } DocIdSetIterator doc_it = scorer.iterator(); int nxtDoc = 0; while ((nxtDoc = doc_it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { String timestamp = new String(sorted_dv.get(nxtDoc).bytes, StandardCharsets.UTF_8); //! note that both timestamp, (start|end)_date_str are both //! formatted so can be ordered lexically int dbg1 = timestamp.compareTo(start_date_str); int dbg2 = timestamp.compareTo(end_date_str); if (timestamp.compareTo(start_date_str) >= 0 && timestamp.compareTo(end_date_str) < 0) { n_doc += 1; } } } return n_doc; }