Example usage for org.apache.lucene.search.similarities ClassicSimilarity tf

List of usage examples for org.apache.lucene.search.similarities ClassicSimilarity tf

Introduction

In this page you can find the example usage for org.apache.lucene.search.similarities ClassicSimilarity tf.

Prototype

@Override
public float tf(float freq) 

Source Link

Document

Implemented as sqrt(freq).

Usage

From source file:indexer.Retriever.java

private String getTF(IndexReader reader, int docID, String word) throws IOException {
    ClassicSimilarity similarity = new ClassicSimilarity();
    int postingsFreq = 0;
    float wordFreq = 0;

    Term term = new Term(documentField, word);
    BytesRef bytesRef = term.bytes();// ww  w. java 2  s.  c om
    PostingsEnum docsEnum = MultiFields.getTermDocsEnum(reader, documentField, bytesRef);
    int currentDocID;
    while ((currentDocID = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (currentDocID == docID) {
            int _postingsFreq = docsEnum.freq();
            wordFreq += similarity.tf(_postingsFreq);
            postingsFreq += _postingsFreq;
        }
    }

    String printString = "\t" + word + ": TF = " + wordFreq + " (" + postingsFreq + " times in this document)";
    return printString;
}

From source file:pretraga.IsolationSimilarity.java

public void test(String vec) {
    List<String> vector = processInput(vec);
    HashMap<String, Long> map = new HashMap<>();
    try {/*from   www. ja  v a2  s .co  m*/
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        List<Integer> docId = getDocumentsFromVector(vector, reader, searcher);

        for (int i = 0; i < docId.size(); i++) {
            Fields ff = reader.getTermVectors(docId.get(i));
            Terms terms = ff.terms(CONTENT);

            TermsEnum te = terms.iterator();
            Object tmp = te.next();
            while (tmp != null) {
                BytesRef by = (BytesRef) tmp;
                String term = by.utf8ToString();

                ClassicSimilarity sim = null;
                if (searcher.getSimilarity(true) instanceof ClassicSimilarity) {
                    sim = (ClassicSimilarity) searcher.getSimilarity(true);
                }
                float idf = sim.idf(te.docFreq(), reader.maxDoc());
                float tf = sim.tf(te.totalTermFreq());
                //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq());
                TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq());
                CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(),
                        terms.getSumTotalTermFreq(), terms.getSumDocFreq());
                Document d = reader.document(docId.get(i));
                if (vector.contains(term)) {
                    float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0))
                            .score(docId.get(i), te.totalTermFreq());
                    System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term);
                }
                tmp = te.next();
            }

            /*Iterator<String> ss = ff.iterator();
            while (ss.hasNext()) {
            String fieldString = ss.next();
            System.out.println(fieldString);
            }*/
        }
    } catch (Exception e) {

    }
}