List of usage examples for org.apache.lucene.index TermsEnum postings
public final PostingsEnum postings(PostingsEnum reuse) throws IOException
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private List<Entry<String, Float>> getTermScoreList(Directory directory) throws CorruptIndexException, IOException { Map<String, Float> termScoreMap = new HashMap<>(); ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity(); try (IndexReader idxReader = DirectoryReader.open(directory)) { idxReader.leaves().stream().map((leaf) -> leaf.reader()).forEach((reader) -> { try { Terms terms = reader.terms(Constants.DOC_CONTENT); TermsEnum termsEnum = terms.iterator(); PostingsEnum postings = null; int docsNum = idxReader.numDocs(); BytesRef text;/*from ww w.jav a2 s. c o m*/ while ((text = termsEnum.next()) != null) { postings = termsEnum.postings(postings); while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) { int freq = postings.freq(); float tf = sim.tf(freq); float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs()); termScoreMap.put(text.utf8ToString(), BETA * (tf * idf)); } } } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } finally { try { idxReader.close(); } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } } }); } return new ArrayList<>(termScoreMap.entrySet()); }
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private float getScore(Directory directory, String term) throws CorruptIndexException, IOException { try (IndexReader idxReader = DirectoryReader.open(directory)) { ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity(); for (LeafReaderContext context : idxReader.leaves()) { LeafReader reader = context.reader(); try { Terms terms = reader.terms(Constants.DOC_CONTENT); TermsEnum termsEnum = terms.iterator(); PostingsEnum postings = null; BytesRef text;//from ww w .ja va2s.c om while ((text = termsEnum.next()) != null) { postings = termsEnum.postings(postings); if (text.utf8ToString().equalsIgnoreCase(term)) { while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) { int freq = postings.freq(); float tf = sim.tf(freq); float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs()); return tf * idf; } } } } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } } } return 0; }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext(String term) { try {/*from w w w . j av a 2 s . c o m*/ logger.info("searchWithContext(" + term + ")"); SpanQuery spanQuery = new SpanTermQuery(new Term("content", term)); Directory indexDirectory = FSDirectory.open( Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); IndexReader reader = searcher.getIndexReader(); //spanQuery = (SpanQuery) spanQuery.rewrite(reader); //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = spanQuery.createWeight(searcher, false) .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); // Spans spans2 = weight.getSpans(reader.leaves().get(0), // SpanWeight.Postings.OFFSETS); //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS); ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs; logger.info("hits :" + sc.length); int i; if (null != spans) { // while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { for (int k = 0; k < sc.length; k++) { int docId = sc[k].doc; logger.info("docID: " + docId); int newDocID = spans.advance(docId); logger.info("newDocID: " + newDocID); int nextSpan = -1; while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) { logger.info("nextSpan : " + nextSpan); logger.info("spans.startPosition(): " + spans.startPosition()); logger.info("spans.endPosition() : " + spans.endPosition()); logger.info("spans.width() : " + spans.width()); Fields fields = reader.getTermVectors(docId); Terms terms = fields.terms("content"); TermsEnum termsEnum = terms.iterator(); BytesRef text; PostingsEnum postingEnum = null; int start = spans.startPosition() - 3; int end = spans.endPosition() + 3; while ((text = termsEnum.next()) != null) { //could store the BytesRef here, but String is easier for this example String s = new String(text.bytes, text.offset, text.length); // DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null); postingEnum = termsEnum.postings(postingEnum); if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { i = 0; int position = -1; while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) { if (position >= start && position <= end) { logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset + " length: " + text.length); } i++; } } } } } } else { logger.info("no " + term + " found!"); } } catch (IOException e) { logger.error(e.getMessage()); } logger.info("End."); }
From source file:org.codelibs.elasticsearch.common.lucene.search.XMoreLikeThis.java
License:Apache License
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap a Map of terms and their frequencies * @param vector List of terms and their frequencies for a doc/field * @param fieldName Optional field name of the terms for skip terms *//*from w ww . j a va 2s . c om*/ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException { final TermsEnum termsEnum = vector.iterator(); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while ((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } if (isSkipTerm(fieldName, term)) { continue; } final PostingsEnum docs = termsEnum.postings(null); int freq = 0; while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { freq += docs.freq(); } // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
From source file:org.elasticsearch.action.termvectors.TermVectorsFilter.java
License:Apache License
private int getTermFreq(TermsEnum termsEnum, PostingsEnum docsEnum) throws IOException { docsEnum = termsEnum.postings(docsEnum); docsEnum.nextDoc();// w ww . j a v a2s .c o m return docsEnum.freq(); }
From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java
License:Apache License
private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException { docsEnum = iterator.postings(docsEnum); int nextDoc = docsEnum.nextDoc(); assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; writeFreq(docsEnum.freq());//from w w w. j av a 2 s .co m nextDoc = docsEnum.nextDoc(); assert nextDoc == DocIdSetIterator.NO_MORE_DOCS; return docsEnum; }
From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java
License:Apache License
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader, IntConsumer consumer) throws IOException { Terms terms = leafReader.terms(idField); TermsEnum iterator = terms.iterator(); BytesRef idTerm;//w w w . j av a2 s.co m PostingsEnum postingsEnum = null; while ((idTerm = iterator.next()) != null) { if (includeInShard.test(idTerm) == false) { postingsEnum = iterator.postings(postingsEnum); int doc; while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { consumer.accept(doc); } } } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapperUnitTests.java
License:Open Source License
private SparseFixedBitSet query(LeafReaderContext leaf, String field, String value) throws IOException { SparseFixedBitSet sparseFixedBitSet = new SparseFixedBitSet(leaf.reader().maxDoc()); TermsEnum tenum = leaf.reader().terms(field).iterator(); while (tenum.next().utf8ToString().equals(value) == false) { }//from w ww. ja va 2 s.com PostingsEnum penum = tenum.postings(null); sparseFixedBitSet.or(penum); return sparseFixedBitSet; }