Example usage for org.apache.lucene.search CollectionStatistics CollectionStatistics

List of usage examples for org.apache.lucene.search CollectionStatistics CollectionStatistics

Introduction

In this page you can find the example usage for org.apache.lucene.search CollectionStatistics CollectionStatistics.

Prototype

public CollectionStatistics(String field, long maxDoc, long docCount, long sumTotalTermFreq, long sumDocFreq) 

Source Link

Document

Creates statistics instance for a collection (field).

Usage

From source file:org.apache.solr.search.stats.CollectionStats.java

License:Apache License

public CollectionStatistics toCollectionStatistics() {
    return new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
}

From source file:org.elasticsearch.action.search.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }//from   www. j a va  2 s .c o  m

        }

        assert !lEntry.value.fieldStatistics().containsKey(null);
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < keys.length; i++) {
            if (keys[i] != null) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.search.controller.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectOpenHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }//from   w  ww.jav a  2 s .  co m

        }
        final boolean[] states = lEntry.value.fieldStatistics().allocated;
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < states.length; i++) {
            if (states[i]) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.search.dfs.DfsSearchResult.java

License:Apache License

public static ObjectObjectOpenHashMap<String, CollectionStatistics> readFieldStats(StreamInput in,
        ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    final int numFieldStatistics = in.readVInt();
    if (fieldStatistics == null) {
        fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
    }//from  w w  w . j a v  a2  s.c om
    for (int i = 0; i < numFieldStatistics; i++) {
        final String field = in.readString();
        assert field != null;
        final long maxDoc = in.readVLong();
        final long docCount = subOne(in.readVLong());
        final long sumTotalTermFreq = subOne(in.readVLong());
        final long sumDocFreq = subOne(in.readVLong());
        CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq,
                sumDocFreq);
        fieldStatistics.put(field, stats);
    }
    return fieldStatistics;
}

From source file:pretraga.IsolationSimilarity.java

public void test(String vec) {
    List<String> vector = processInput(vec);
    HashMap<String, Long> map = new HashMap<>();
    try {//from  w  w  w  .j  ava2 s  . com
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        List<Integer> docId = getDocumentsFromVector(vector, reader, searcher);

        for (int i = 0; i < docId.size(); i++) {
            Fields ff = reader.getTermVectors(docId.get(i));
            Terms terms = ff.terms(CONTENT);

            TermsEnum te = terms.iterator();
            Object tmp = te.next();
            while (tmp != null) {
                BytesRef by = (BytesRef) tmp;
                String term = by.utf8ToString();

                ClassicSimilarity sim = null;
                if (searcher.getSimilarity(true) instanceof ClassicSimilarity) {
                    sim = (ClassicSimilarity) searcher.getSimilarity(true);
                }
                float idf = sim.idf(te.docFreq(), reader.maxDoc());
                float tf = sim.tf(te.totalTermFreq());
                //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq());
                TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq());
                CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(),
                        terms.getSumTotalTermFreq(), terms.getSumDocFreq());
                Document d = reader.document(docId.get(i));
                if (vector.contains(term)) {
                    float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0))
                            .score(docId.get(i), te.totalTermFreq());
                    System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term);
                }
                tmp = te.next();
            }

            /*Iterator<String> ss = ff.iterator();
            while (ss.hasNext()) {
            String fieldString = ss.next();
            System.out.println(fieldString);
            }*/
        }
    } catch (Exception e) {

    }
}