Example usage for org.apache.solr.common.params FacetParams FACET_ENUM_CACHE_MINDF

List of usage examples for org.apache.solr.common.params FacetParams FACET_ENUM_CACHE_MINDF

Introduction

In this page you can find the example usage for org.apache.solr.common.params FacetParams FACET_ENUM_CACHE_MINDF.

Prototype

String FACET_ENUM_CACHE_MINDF

To view the source code for org.apache.solr.common.params FacetParams FACET_ENUM_CACHE_MINDF.

Click Source Link

Document

When faceting by enumerating the terms in a field, only use the filterCache for terms with a df >= to this parameter.

Usage

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Returns a list of terms in the specified field along with the corresponding count of documents in the set that match that constraint.
 * This method uses the FilterCache to get the intersection count between <code>docs</code> and the DocSet for each term in the filter.
 * //from  www  . ja v  a  2 s  . com
 * @see FacetParams#FACET_LIMIT
 * @see FacetParams#FACET_ZEROS
 * @see FacetParams#FACET_MISSING
 */
public NamedList<Object> getFacetTermEnumCounts(IndexSearcher searcher, OpenBitSet docs, String field,
        String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix,
        String locale, ExtendedPropertyDefinition epd) throws IOException {

    /*
     * :TODO: potential optimization... cache the Terms with the highest docFreq and try them first don't enum if we get our max from
     * them
     */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    IndexReader r = searcher.getIndexReader();
    FieldType ft = getType(epd);

    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = (sort.equals("count") || sort.equals("true"))
            ? new TreeSet<SimpleFacets.CountPair<String, Integer>>()
            : null;
    final NamedList<Object> res = new NamedList<Object>();

    int min = mincount - 1; // the smallest value in the top 'N' values
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    String startTerm = prefix == null ? "" : ft.toInternal(prefix);
    TermEnum te = r.terms(new Term(fieldName, startTerm));
    TermDocs td = r.termDocs();
    SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
    tdState.tenum = te;
    tdState.tdocs = td;

    if (docs.size() >= mincount) {
        do {
            Term t = te.term();

            if (null == t || !t.field().equals(fieldName))
                break;

            if (prefix != null && !t.text().startsWith(prefix))
                break;

            int df = te.docFreq();

            // If we are sorting, we can use df>min (rather than >=) since we
            // are going in index order. For certain term distributions this can
            // make a large difference (for example, many terms with df=1).
            if (df > 0 && df > min) {
                int c;

                if (df >= minDfFilterCache) {
                    // use the filter cache
                    // TODO: use the new method ???                        
                    //                        docs.intersectionSize( searcher.getPositiveDocSet(new TermQuery(t), tdState) );
                    c = (int) OpenBitSet.intersectionCount(getDocIdSet(new TermQuery(t), locale), docs);
                } else {
                    // iterate over TermDocs to calculate the intersection
                    td.seek(te);
                    c = 0;
                    while (td.next()) {
                        int doc = td.doc();
                        if (locale != null) {
                            doc = getMainDocIdForTranslations(
                                    searcher.getIndexReader().document(doc, PARENT_AND_TRANSLATION_FIELDS),
                                    locale);
                        }

                        if (docs.fastGet(doc)) {
                            c++;
                        }
                    }
                }

                if (sort.equals("count") || sort.equals("true")) {
                    if (c > min) {
                        queue.add(new SimpleFacets.CountPair<String, Integer>(t.text(), c));
                        if (queue.size() >= maxsize) {
                            break;
                        }
                    }
                } else {
                    if (c >= mincount && --off < 0) {
                        if (--lim < 0)
                            break;
                        res.add(ft.indexedToReadable(t.text()), c);
                    }
                }
            }
        } while (te.next());
    }

    if (sort.equals("count") || sort.equals("true")) {
        for (SimpleFacets.CountPair<String, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            res.add(ft.indexedToReadable(p.key), p.val);
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    te.close();
    td.close();

    return res;
}