Example usage for org.apache.solr.schema FieldType toInternal

List of usage examples for org.apache.solr.schema FieldType toInternal

Introduction

In this page you can find the example usage for org.apache.solr.schema FieldType toInternal.

Prototype

public String toInternal(String val) 

Source Link

Document

Convert an external value (from XML update command or from query string) into the internal format for both storing and indexing (which can be modified by any analyzers).

Usage

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

private NamedList<Object> getListedTermCounts(String field, ExtendedPropertyDefinition epd,
        String fieldNameInIndex, String locale, String termList) throws IOException {
    FieldType ft = getType(epd);
    List<String> terms = StrUtils.splitSmart(termList, ",", true);
    NamedList<Object> res = new NamedList<Object>();
    Term t = new Term(field);
    for (String term : terms) {
        String internal = ft.toInternal(term);
        int count = (int) OpenBitSet.intersectionCount(getDocIdSet(new TermQuery(t.createTerm(internal)), ""),
                base);/*from   ww  w  .j  a v  a2s.  c o m*/
        res.add(term, count);
    }
    return res;
}

From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java

License:Open Source License

/**
 * Returns a list of terms in the specified field along with the corresponding count of documents in the set that match that constraint.
 * This method uses the FilterCache to get the intersection count between <code>docs</code> and the DocSet for each term in the filter.
 * /*from  w  w w. j a v a2s  . co m*/
 * @see FacetParams#FACET_LIMIT
 * @see FacetParams#FACET_ZEROS
 * @see FacetParams#FACET_MISSING
 */
public NamedList<Object> getFacetTermEnumCounts(IndexSearcher searcher, OpenBitSet docs, String field,
        String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix,
        String locale, ExtendedPropertyDefinition epd) throws IOException {

    /*
     * :TODO: potential optimization... cache the Terms with the highest docFreq and try them first don't enum if we get our max from
     * them
     */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    IndexReader r = searcher.getIndexReader();
    FieldType ft = getType(epd);

    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = (sort.equals("count") || sort.equals("true"))
            ? new TreeSet<SimpleFacets.CountPair<String, Integer>>()
            : null;
    final NamedList<Object> res = new NamedList<Object>();

    int min = mincount - 1; // the smallest value in the top 'N' values
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    String startTerm = prefix == null ? "" : ft.toInternal(prefix);
    TermEnum te = r.terms(new Term(fieldName, startTerm));
    TermDocs td = r.termDocs();
    SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
    tdState.tenum = te;
    tdState.tdocs = td;

    if (docs.size() >= mincount) {
        do {
            Term t = te.term();

            if (null == t || !t.field().equals(fieldName))
                break;

            if (prefix != null && !t.text().startsWith(prefix))
                break;

            int df = te.docFreq();

            // If we are sorting, we can use df>min (rather than >=) since we
            // are going in index order. For certain term distributions this can
            // make a large difference (for example, many terms with df=1).
            if (df > 0 && df > min) {
                int c;

                if (df >= minDfFilterCache) {
                    // use the filter cache
                    // TODO: use the new method ???                        
                    //                        docs.intersectionSize( searcher.getPositiveDocSet(new TermQuery(t), tdState) );
                    c = (int) OpenBitSet.intersectionCount(getDocIdSet(new TermQuery(t), locale), docs);
                } else {
                    // iterate over TermDocs to calculate the intersection
                    td.seek(te);
                    c = 0;
                    while (td.next()) {
                        int doc = td.doc();
                        if (locale != null) {
                            doc = getMainDocIdForTranslations(
                                    searcher.getIndexReader().document(doc, PARENT_AND_TRANSLATION_FIELDS),
                                    locale);
                        }

                        if (docs.fastGet(doc)) {
                            c++;
                        }
                    }
                }

                if (sort.equals("count") || sort.equals("true")) {
                    if (c > min) {
                        queue.add(new SimpleFacets.CountPair<String, Integer>(t.text(), c));
                        if (queue.size() >= maxsize) {
                            break;
                        }
                    }
                } else {
                    if (c >= mincount && --off < 0) {
                        if (--lim < 0)
                            break;
                        res.add(ft.indexedToReadable(t.text()), c);
                    }
                }
            }
        } while (te.next());
    }

    if (sort.equals("count") || sort.equals("true")) {
        for (SimpleFacets.CountPair<String, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            res.add(ft.indexedToReadable(p.key), p.val);
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale));
    }

    te.close();
    td.close();

    return res;
}