org.pageseeder.flint.lucene.facet.FlexibleIntervalFacet.java Source code

Java tutorial

Introduction

Here is the source code for org.pageseeder.flint.lucene.facet.FlexibleIntervalFacet.java

Source

/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.facet;

import java.io.IOException;
import java.time.OffsetDateTime;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.pageseeder.flint.lucene.search.DocumentCounter;
import org.pageseeder.flint.lucene.search.Filter;
import org.pageseeder.flint.lucene.search.Terms;
import org.pageseeder.flint.lucene.util.Beta;
import org.pageseeder.flint.lucene.util.Bucket;
import org.pageseeder.flint.lucene.util.Bucket.Entry;
import org.pageseeder.flint.lucene.util.Dates;
import org.pageseeder.xmlwriter.XMLWriter;

/**
 * A facet implementation using a simple index field.
 *
 * @author Jean-Baptiste Reure
 *
 * @version 5.1.3
 */
@Beta
public abstract class FlexibleIntervalFacet extends FlexibleFacet<FlexibleIntervalFacet.Interval> {

    /**
     * The default number of facet values if not specified.
     */
    public static final int DEFAULT_MAX_NUMBER_OF_VALUES = 10;

    /**
     * The point of reference for intervals
     */
    private final String _start;

    /**
     * A max value not to search after
     */
    private final String _end;

    /**
     * If the lower limit of each interval is included
     */
    private final boolean _includeLower;

    /**
     * If the upper limit of the last interval is included
     */
    private final boolean _includeLastUpper;

    /**
     * The max nb of intervals
     */
    protected final int _maxIntervals;

    /**
     * The queries used to calculate each facet.
     */
    protected transient Bucket<Interval> bucket;

    /**
     * The total number of intervals with results
     */
    protected transient int totalIntervals = 0;

    /**
     * Creates a new facet with the specified name;
     *
     * @param name         The name of the facet.
     * @param start        The starting point when computing intervals
     * @param end          An end value not to search past
     * @param maxIntervals The maximum number of intervals to load
     */
    protected FlexibleIntervalFacet(String name, String start, String end, boolean includeLower,
            boolean includeLastUpper, int maxIntervals) {
        super(name);
        this._start = start;
        this._end = end;
        this._includeLower = includeLower;
        this._includeLastUpper = includeLastUpper;
        this._maxIntervals = maxIntervals;
    }

    /**
     * Returns the point of reference for intervals.
     * @return the point of reference for intervals.
     */
    public String start() {
        return this._start;
    }

    /**
     * Returns the end value not to search past.
     * @return the end value not to search past.
     */
    public String end() {
        return this._end;
    }

    /**
     * Returns <code>true</code> if the lower limit of each interval is included
     * @return <code>true</code> if the lower limit of each interval is included
     */
    public boolean includeLower() {
        return this._includeLower;
    }

    /**
     * Returns <code>true</code> if the upper limit of the last interval is included
     * @return <code>true</code> if the upper limit of the last interval is included
     */
    public boolean includeLastUpper() {
        return this._includeLastUpper;
    }

    /**
     * Computes each facet option as a flexible facet.
     * All filters but the ones using the same field as this facet are applied to the base query before computing the numbers.
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     * @param filters  the filters applied to the base query (ignored if the base query is null)
     * @param size     the maximum number of field values to compute.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, List<Filter> filters, int size) throws IOException {
        // If the base is null, simply calculate for each query
        if (base == null) {
            compute(searcher, size);
        } else {
            if (size < 0)
                throw new IllegalArgumentException("size < 0");
            // reset total terms
            this.totalIntervals = 0;
            // find all terms
            List<Term> terms = Terms.terms(searcher.getIndexReader(), this._name);
            // Otherwise, re-compute the query without the corresponding filter 
            Query filtered = base;
            if (filters != null) {
                this.flexible = true;
                for (Filter filter : filters) {
                    if (!this._name.equals(filter.name()))
                        filtered = filter.filterQuery(filtered);
                }
            }
            Map<Interval, Integer> intervals = new HashMap<>();
            DocumentCounter counter = new DocumentCounter();
            for (Term t : terms) {
                // find range
                Interval r = findInterval(t);
                if (r == null)
                    continue;
                // find count
                BooleanQuery query = new BooleanQuery();
                query.add(filtered, Occur.MUST);
                query.add(termToQuery(t), Occur.MUST);
                searcher.search(query, counter);
                int count = counter.getCount();
                if (count > 0) {
                    // add to map
                    Integer ec = intervals.get(r);
                    intervals.put(r, Integer.valueOf(count + (ec == null ? 0 : ec.intValue())));
                    // check size to stop computing if too big
                    if (this._maxIntervals > 0 && intervals.size() > this._maxIntervals)
                        return;
                }
                counter.reset();
            }
            this.totalIntervals = intervals.size();
            // add to bucket
            Bucket<Interval> b = new Bucket<Interval>(size);
            for (Interval interval : intervals.keySet()) {
                b.add(interval, intervals.get(interval));
            }
            this.bucket = b;
        }
    }

    /**
     * Computes each facet option.
     *
     * <p>Same as <code>compute(searcher, base, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, int size) throws IOException {
        compute(searcher, base, null, size);
    }

    /**
     * Computes each facet option.
     *
     * <p>Same as <code>compute(searcher, base, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base) throws IOException {
        compute(searcher, base, null, DEFAULT_MAX_NUMBER_OF_VALUES);
    }

    /**
     * Computes each facet option as a flexible facet.
     *
     * <p>Same as <code>computeFlexible(searcher, base, filters, 10);</code>.
     *
     * <p>Defaults to 10.
     *
     * @see #compute(IndexSearcher, Query, List, int)
     *
     * @param searcher the index search to use.
     * @param base     the base query.
     * @param filters  the filters applied to the base query
     *
     * @throws IOException if thrown by the searcher.
     */
    public void compute(IndexSearcher searcher, Query base, List<Filter> filters) throws IOException {
        compute(searcher, base, filters, DEFAULT_MAX_NUMBER_OF_VALUES);
    }

    /**
     * Computes each facet option without a base query.
     *
     * @param searcher the index search to use.
     * @param size     the number of facet values to calculate.
     *
     * @throws IOException if thrown by the searcher.
     */
    private void compute(IndexSearcher searcher, int size) throws IOException {
        // find all terms
        List<Term> terms = Terms.terms(searcher.getIndexReader(), this._name);
        DocumentCounter counter = new DocumentCounter();
        Map<Interval, Integer> intervals = new HashMap<>();
        for (Term t : terms) {
            // find the range
            Interval interval = findInterval(t);
            if (interval == null)
                continue;
            // find number
            searcher.search(termToQuery(t), counter);
            int count = counter.getCount();
            if (count > 0) {
                // add to map
                Integer ec = intervals.get(interval);
                intervals.put(interval, Integer.valueOf(count + (ec == null ? 0 : ec.intValue())));
                // check size to stop computing if too big
                if (this._maxIntervals > 0 && intervals.size() > this._maxIntervals)
                    return;
            }
            counter.reset();
        }
        // set totals
        this.totalIntervals = intervals.size();
        // add to bucket
        Bucket<Interval> b = new Bucket<>(size);
        for (Interval interval : intervals.keySet()) {
            b.add(interval, intervals.get(interval));
        }
        this.bucket = b;
    }

    /**
     * Create a query for the term given, using the numeric type if there is one.
     * 
     * @param t the term
     * 
     * @return the query
     */
    protected abstract Query termToQuery(Term t);

    public abstract String getType();

    protected abstract void intervalToXML(Interval interval, int cardinality, XMLWriter xml) throws IOException;

    protected abstract Interval findInterval(Term t);

    @Override
    public void toXML(XMLWriter xml) throws IOException {
        xml.openElement("facet", true);
        xml.attribute("name", this._name);
        xml.attribute("start", this._start);
        if (this._end != null)
            xml.attribute("end", this._end);
        xml.attribute("type", getType());
        xml.attribute("flexible", String.valueOf(this.flexible));
        if (!this.flexible) {
            xml.attribute("total-intervals", this.totalIntervals);
        }
        if (this.bucket != null) {
            for (Entry<Interval> e : this.bucket.entrySet()) {
                intervalToXML(e.item(), e.count(), xml);
            }
        }
        xml.closeElement();
    }

    public Bucket<Interval> getValues() {
        return this.bucket;
    }

    public int getTotalIntervals() {
        return this.totalIntervals;
    }

    public static class Interval implements Comparable<Interval> {
        private final String _min;
        private final String max;
        private boolean _includeMin;
        private boolean _includeMax;
        private final Resolution _resolution;

        private Interval(String min, boolean withMin, String max, boolean withMax) {
            this(min, withMin, max, withMax, null);
        }

        private Interval(String min, boolean withMin, String max, boolean withMax, Resolution resolution) {
            this.max = max;
            this._min = min;
            this._includeMin = withMin;
            this._includeMax = withMax;
            this._resolution = resolution;
        }

        public String getMin() {
            return this._min;
        }

        public String getMax() {
            return this.max;
        }

        public String getFormattedMin() {
            return this._resolution != null ? toDateString(this._min, this._resolution) : this._min;
        }

        public String getFormattedMax() {
            return this._resolution != null ? toDateString(this.max, this._resolution) : this.max;
        }

        public boolean includeMax() {
            return this._includeMax;
        }

        public boolean includeMin() {
            return this._includeMin;
        }

        @Override
        public String toString() {
            return (this._includeMin ? '[' : '{') + this._min + '-' + this.max + (this._includeMax ? ']' : '}');
        }

        @Override
        public boolean equals(Object obj) {
            if (obj instanceof Interval) {
                Interval r = (Interval) obj;
                return ((r._min == null && this._min == null) || (r._min != null && r._min.equals(this._min)))
                        && ((r.max == null && this.max == null) || (r.max != null && r.max.equals(this.max)))
                        && this._includeMin == r._includeMin && this._includeMax == r._includeMax;
            }
            return false;
        }

        @Override
        public int hashCode() {
            return (this._min != null ? this._min.hashCode() * 13 : 13)
                    + (this.max != null ? this.max.hashCode() * 11 : 11) + (this._includeMin ? 17 : 7)
                    + (this._includeMax ? 5 : 3);
        }

        @Override
        public int compareTo(Interval o) {
            if (this._min == null) {
                if (o._min != null)
                    return -1;
                if (this.max == null)
                    return -1;
                if (o.max == null)
                    return 1;
                return this.max.compareTo(o.max);
            } else {
                if (o._min == null)
                    return 1;
                return this._min.compareTo(o._min);
            }
        }

        public static Interval stringInterval(String mi, String ma) {
            return stringInterval(mi, true, ma, false);
        }

        public static Interval stringInterval(String mi, boolean withMin, String ma, boolean withMax) {
            return new Interval(mi, withMin, ma, withMax);
        }

        public static Interval numericInterval(Number mi, Number ma) {
            return numericInterval(mi, true, ma, false);
        }

        public static Interval numericInterval(Number mi, boolean withMin, Number ma, boolean withMax) {
            return new Interval(mi == null ? null : mi.toString(), withMin, ma == null ? null : ma.toString(),
                    withMax);
        }

        public static Interval dateInterval(Date mi, Date ma, Resolution res) {
            return dateInterval(mi, true, ma, false, res);
        }

        public static Interval dateInterval(Date mi, boolean withMin, Date ma, boolean withMax, Resolution res) {
            return new Interval(Dates.toString(mi, res), withMin, Dates.toString(ma, res), withMax);
        }

        public static Interval dateInterval(OffsetDateTime mi, OffsetDateTime ma, Resolution res) {
            return dateInterval(mi, true, ma, false, res);
        }

        public static Interval dateInterval(OffsetDateTime mi, boolean withMin, OffsetDateTime ma, boolean withMax,
                Resolution res) {
            return new Interval(Dates.toString(mi, res), withMin, Dates.toString(ma, res), withMax, res);
        }
    }

}