org.pageseeder.flint.lucene.facet.FlexibleFieldFacet.java Source code

Java tutorial

Introduction

Here is the source code for org.pageseeder.flint.lucene.facet.FlexibleFieldFacet.java

Source

/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.facet;

import java.io.IOException;
import java.text.ParseException;
import java.util.List;

import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.pageseeder.flint.lucene.search.DocumentCounter;
import org.pageseeder.flint.lucene.search.Filter;
import org.pageseeder.flint.lucene.search.Terms;
import org.pageseeder.flint.lucene.util.Beta;
import org.pageseeder.flint.lucene.util.Bucket;
import org.pageseeder.flint.lucene.util.Bucket.Entry;
import org.pageseeder.flint.lucene.util.Dates;
import org.pageseeder.xmlwriter.XMLWriter;

/**
 * A facet implementation using a simple index field.
 *
 * @author Jean-Baptiste Reure
 *
 * @version 5.1.3
 */
@Beta
public abstract class FlexibleFieldFacet extends FlexibleFacet<String> {

    /**
     * The default number of facet values if not specified.
     */
    public static final int DEFAULT_MAX_NUMBER_OF_VALUES = 10;

    /**
     * The max nb of terms
     */
    private final int _maxTerms;

    /**
     * The queries used to calculate each facet.
     */
    private transient Bucket<String> bucket;

    /**
     * The total number of terms found in the search results
     */
    private transient int totalTerms = -1;

    /**
     * If there are results containing the field used in this facet
     */
    private transient boolean hasResults = false;

    /**
     * Creates a new facet with the specified name;
     *
     * @param name     The name of the facet.
     * @param maxterms The maximum number of terms to return
     */
    protected FlexibleFieldFacet(String name, int maxterms) {
        super(name);
        this._maxTerms = maxterms;
    }

    /**
     * Computes each facet option as a flexible facet.
     * All filters but the ones using the same field as this facet are applied to the base query before computing the numbers.
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     * @param filters  the filters applied to the base query (ignored if the base query is null)
     * @param size     the maximum number of field values to compute.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, List<Filter> filters, int size) throws IOException {
        // If the base is null, simply calculate for each query
        if (base == null) {
            compute(searcher, size);
        } else {
            if (size < 0)
                throw new IllegalArgumentException("size < 0");
            // reset
            this.totalTerms = size == 0 ? -1 : 0;
            this.hasResults = false;
            this.bucket = null;
            // re-compute the query without the corresponding filter (for flexible facets)
            Query filtered = base;
            if (filters != null) {
                this.flexible = true;
                for (Filter filter : filters) {
                    if (!this._name.equals(filter.name()))
                        filtered = filter.filterQuery(filtered);
                }
            }
            // try wildcard query as it's faster, but if it fails go through all terms
            if (size == 0)
                try {
                    BooleanQuery query = new BooleanQuery();
                    query.add(filtered, Occur.MUST);
                    query.add(new WildcardQuery(new Term(this._name, "*")), Occur.MUST);
                    TopDocs td = searcher.search(query, 1);
                    this.hasResults = td.totalHits > 0;
                    return;
                } catch (Exception ex) {
                    // oh well go through terms then
                }
            // find all terms
            List<Term> terms = Terms.terms(searcher.getIndexReader(), name());
            if (this._maxTerms > 0 && terms.size() > this._maxTerms)
                return;
            // loop through terms
            DocumentCounter counter = new DocumentCounter();
            Bucket<String> bucket = new Bucket<>(size);
            for (Term t : terms) {
                BooleanQuery query = new BooleanQuery();
                query.add(filtered, Occur.MUST);
                query.add(termToQuery(t), Occur.MUST);
                if (size == 0) {
                    // we just want to know if there are results,
                    // so load only one and stop when we get one
                    TopDocs td = searcher.search(query, 1);
                    if (td.totalHits > 0) {
                        this.hasResults = true;
                        return;
                    }
                } else {
                    // count results
                    searcher.search(query, counter);
                    int count = counter.getCount();
                    bucket.add(t.text(), count);
                    counter.reset();
                    if (count > 0) {
                        this.totalTerms++;
                        this.hasResults = true;
                    }
                }
            }
            if (size != 0)
                this.bucket = bucket;
        }
    }

    /**
     * Computes each facet option.
     *
     * <p>Same as <code>compute(searcher, base, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, int size) throws IOException {
        compute(searcher, base, null, size);
    }

    /**
     * Computes each facet option.
     *
     * <p>Same as <code>compute(searcher, base, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base) throws IOException {
        compute(searcher, base, null, DEFAULT_MAX_NUMBER_OF_VALUES);
    }

    /**
     * Computes each facet option as a flexible facet.
     *
     * <p>Same as <code>computeFlexible(searcher, base, filters, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, List, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     * @param filters  the filters applied to the base query
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, List<Filter> filters) throws IOException {
        compute(searcher, base, filters, DEFAULT_MAX_NUMBER_OF_VALUES);
    }

    /**
     * Computes each facet option without a base query.
     *
     * @param searcher the index search to use.
     * @param size     the number of facet values to calculate.
     *
     * @throws IOException if thrown by the searcher.
     */
    private void compute(IndexSearcher searcher, int size) throws IOException {
        if (size == 0) {
            // reset
            this.totalTerms = -1;
            this.bucket = null;
            // check if there are terms
            this.hasResults = !Terms.terms(searcher.getIndexReader(), this._name).isEmpty();
        } else {
            // reset
            this.totalTerms = 0;
            this.hasResults = false;
            this.bucket = null;
            // find all terms
            List<Term> terms = Terms.terms(searcher.getIndexReader(), this._name);
            if (this._maxTerms > 0 && terms.size() > this._maxTerms)
                return;
            Bucket<String> bucket = new Bucket<>(size);
            DocumentCounter counter = new DocumentCounter();
            for (Term t : terms) {
                searcher.search(termToQuery(t), counter);
                bucket.add(t.text(), counter.getCount());
                counter.reset();
                this.totalTerms++;
                this.hasResults = true;
            }
            // set bucket
            this.bucket = bucket;
        }
    }

    /**
     * Create a query for the term given, using the numeric type if there is one.
     * 
     * @param t the term
     * 
     * @return the query
     */
    protected abstract Query termToQuery(Term t);

    protected abstract void termToXML(String term, int cardinality, XMLWriter xml) throws IOException;

    @Override
    public void toXML(XMLWriter xml) throws IOException {
        xml.openElement("facet", true);
        xml.attribute("name", this._name);
        xml.attribute("type", getType());
        xml.attribute("flexible", String.valueOf(this.flexible));
        if (this.totalTerms == -1)
            xml.attribute("has-results", this.hasResults ? "true" : "false");
        else {
            xml.attribute("has-results", this.totalTerms > 0 ? "true" : "false");
            xml.attribute("total-terms", this.totalTerms);
        }
        if (this.bucket != null) {
            for (Entry<String> e : this.bucket.entrySet()) {
                termToXML(e.item(), e.count(), xml);
            }
        }
        xml.closeElement();
    }

    public Bucket<String> getValues() {
        return this.bucket;
    }

    public int getTotalTerms() {
        return this.totalTerms;
    }

    public boolean hasResults() {
        return this.hasResults;
    }

}