List of usage examples for org.apache.lucene.search.similarities ClassicSimilarity tf
@Override public float tf(float freq)
sqrt(freq). From source file:indexer.Retriever.java
private String getTF(IndexReader reader, int docID, String word) throws IOException { ClassicSimilarity similarity = new ClassicSimilarity(); int postingsFreq = 0; float wordFreq = 0; Term term = new Term(documentField, word); BytesRef bytesRef = term.bytes();// ww w. java 2 s. c om PostingsEnum docsEnum = MultiFields.getTermDocsEnum(reader, documentField, bytesRef); int currentDocID; while ((currentDocID = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (currentDocID == docID) { int _postingsFreq = docsEnum.freq(); wordFreq += similarity.tf(_postingsFreq); postingsFreq += _postingsFreq; } } String printString = "\t" + word + ": TF = " + wordFreq + " (" + postingsFreq + " times in this document)"; return printString; }
From source file:pretraga.IsolationSimilarity.java
public void test(String vec) { List<String> vector = processInput(vec); HashMap<String, Long> map = new HashMap<>(); try {/*from www. ja v a2 s .co m*/ Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath()); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); List<Integer> docId = getDocumentsFromVector(vector, reader, searcher); for (int i = 0; i < docId.size(); i++) { Fields ff = reader.getTermVectors(docId.get(i)); Terms terms = ff.terms(CONTENT); TermsEnum te = terms.iterator(); Object tmp = te.next(); while (tmp != null) { BytesRef by = (BytesRef) tmp; String term = by.utf8ToString(); ClassicSimilarity sim = null; if (searcher.getSimilarity(true) instanceof ClassicSimilarity) { sim = (ClassicSimilarity) searcher.getSimilarity(true); } float idf = sim.idf(te.docFreq(), reader.maxDoc()); float tf = sim.tf(te.totalTermFreq()); //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq()); TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq()); CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(), terms.getSumTotalTermFreq(), terms.getSumDocFreq()); Document d = reader.document(docId.get(i)); if (vector.contains(term)) { float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0)) .score(docId.get(i), te.totalTermFreq()); System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term); } tmp = te.next(); } /*Iterator<String> ss = ff.iterator(); while (ss.hasNext()) { String fieldString = ss.next(); System.out.println(fieldString); }*/ } } catch (Exception e) { } }