org.emonocot.persistence.dao.hibernate.SearchableDaoImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.emonocot.persistence.dao.hibernate.SearchableDaoImpl.java

Source

/*
 * This is eMonocot, a global online biodiversity information resource.
 *
 * Copyright  20112015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford
 *
 * eMonocot is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * The complete text of the GNU Affero General Public License is in the source repository as the file
 * COPYING.  It is also available from <http://www.gnu.org/licenses/>.
 */
package org.emonocot.persistence.dao.hibernate;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.Collections;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.Group;
import org.apache.solr.client.solrj.response.GroupCommand;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.FacetParams;
import org.emonocot.api.autocomplete.Match;
import org.emonocot.model.Base;
import org.emonocot.pager.CellSet;
import org.emonocot.pager.Cube;
import org.emonocot.pager.DefaultPageImpl;
import org.emonocot.pager.FacetName;
import org.emonocot.pager.Level;
import org.emonocot.pager.Page;
import org.emonocot.persistence.dao.SearchableDao;
import org.hibernate.ObjectNotFoundException;
import org.springframework.beans.factory.annotation.Autowired;

/**
 *
 * @author ben
 *
 * @param <T>
 */
public abstract class SearchableDaoImpl<T extends Base> extends DaoImpl<T> implements SearchableDao<T> {

    private SolrServer solrServer = null;

    @Autowired
    public void setSolrServer(SolrServer solrServer) {
        this.solrServer = solrServer;
    }

    /**
     * Does this DAO search for SearchableObjects?
     * @return
     */
    protected boolean isSearchableObject() {
        return true;
    }

    /**
     *
     * @param newType
     *            Set the type of object handled by this class
     * @param searchTypes
     *            Set the subclasses of T to be searched for
     */
    public SearchableDaoImpl(final Class<T> newType) {
        super(newType);
    }

    /**
     * @param query
     *            A lucene query
     * @param spatialQuery
     *            A spatial query to filter the results by
     * @param pageSize
     *            The maximum number of results to return
     * @param pageNumber
     *            The offset (in pageSize chunks, 0-based) from the beginning of
     *            the recordset
     * @param facets
     *            The names of the facets you want to calculate
     * @param selectedFacets
     *            A map of facets which you would like to restrict the search by
     * @param sort
     *            A representation for the order results should be returned in
     * @param fetch
     *            Set the fetch profile
     * @return a Page from the resultset
     * @throws SolrServerException
     */
    public final Page<T> search(final String query, final String spatialQuery, final Integer pageSize,
            final Integer pageNumber, final String[] facets, Map<String, String> facetPrefixes,
            final Map<String, String> selectedFacets, final String sort, final String fetch)
            throws SolrServerException {
        SolrQuery solrQuery = prepareQuery(query, sort, pageSize, pageNumber, selectedFacets);
        solrQuery.set("spellcheck", "true");
        solrQuery.set("spellcheck.collate", "true");
        solrQuery.set("spellcheck.count", "1");
        solrQuery.set("bq", "base.class_s:org.emonocot.model.Taxon^2.0");

        // Filter the searchable objects out
        solrQuery.addFilterQuery("base.class_searchable_b:" + isSearchableObject());

        if (spatialQuery != null && spatialQuery.trim().length() != 0) {
            solrQuery.addFilterQuery(spatialQuery);
        }

        if (facets != null && facets.length != 0) {
            solrQuery.setFacet(true);
            solrQuery.setFacetMinCount(1);
            solrQuery.setFacetSort(FacetParams.FACET_SORT_INDEX);

            for (String facet : facets) {
                if (facet.equals("base.class_s")) {
                    solrQuery.setParam("f.base.class_s.facet.sort", FacetParams.FACET_SORT_COUNT);
                }
                if (facet.endsWith("_dt")) {
                    /**
                     * Is a date facet. Once Solr 4.2 is released, we can implement variable length buckets, but for now
                     * stick with fixed buckets https://issues.apache.org/jira/browse/SOLR-2366
                     */

                    solrQuery.add("facet.range", facet);
                    solrQuery.add("f." + facet + ".facet.range.start", "NOW/DAY-1YEARS");
                    solrQuery.add("f." + facet + ".facet.range.end", "NOW/DAY");
                    solrQuery.add("f." + facet + ".facet.range.gap", "+1MONTH");
                } else {
                    solrQuery.addFacetField(facet);
                }
                includeMissing(solrQuery, facet);
            }
            if (facetPrefixes != null) {
                for (String facet : facetPrefixes.keySet()) {
                    solrQuery.add("f." + facet + ".facet.prefix", facetPrefixes.get(facet));
                }
            }
        }
        QueryResponse queryResponse = solrServer.query(solrQuery);

        List<T> results = new ArrayList<T>();
        for (SolrDocument solrDocument : queryResponse.getResults()) {
            T object = loadObjectForDocument(solrDocument);
            enableProfilePostQuery(object, fetch);
            results.add(object);
        }

        Long totalResults = new Long(queryResponse.getResults().getNumFound());
        Page<T> page = new DefaultPageImpl<T>(totalResults.intValue(), pageNumber, pageSize, results,
                queryResponse);
        if (selectedFacets != null) {
            page.setSelectedFacets(selectedFacets);
        }
        page.setSort(sort);

        return page;
    }

    private void includeMissing(SolrQuery solrQuery, String facet) {
        try {
            FacetName fn = FacetName.fromString(facet);
            if (fn != null && fn.isIncludeMissing()) {
                solrQuery.set("f." + fn.getSolrField() + ".facet.missing", true);
            }
        } catch (IllegalArgumentException e) {
            logger.debug("Unable to find a facet for " + facet);
        }
    }

    public List<Match> autocomplete(final String query, Integer pageSize, Map<String, String> selectedFacets)
            throws SolrServerException {
        SolrQuery solrQuery = new SolrQuery();

        if (query != null && !query.trim().equals("")) {
            //String searchString = query.trim().replace(" ", "+");
            solrQuery.setQuery(query);
        } else {
            return new ArrayList<Match>();
        }

        // Filter the searchable objects out
        solrQuery.addFilterQuery("base.class_searchable_b:" + isSearchableObject());

        // Set additional result parameters
        //solrQuery.setRows(pageSize);
        int rows = 100;
        solrQuery.setRows(rows);

        if (selectedFacets != null && !selectedFacets.isEmpty()) {
            for (String facetName : selectedFacets.keySet()) {
                solrQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName));
            }
        }

        solrQuery.set("defType", "edismax");
        solrQuery.set("qf", "autocomplete^3 autocompleteng");
        solrQuery.set("pf", "autocompletenge");
        solrQuery.set("fl", "autocomplete,id");
        solrQuery.setHighlight(true);
        solrQuery.set("hl.fl", "autocomplete");
        solrQuery.set("hl.snippets", 3);
        solrQuery.setHighlightSimplePre("<b>");
        solrQuery.setHighlightSimplePost("</b>");
        //solrQuery.setSortField("autocomplete", SolrQuery.ORDER.valueOf("desc"));
        /*
        solrQuery.set("group","true");
        solrQuery.set("group.field", "autocomplete");
         */

        QueryResponse queryResponse = solrServer.query(solrQuery);

        List<Match> results = new ArrayList<Match>();
        Map<String, Match> matchMap = new HashMap<String, Match>();

        for (SolrDocument solrDocument : queryResponse.getResults()) {
            Match match = new Match();
            String label = filter((String) solrDocument.get("autocomplete"));
            match.setLabel(label);
            match.setValue(label);
            matchMap.put((String) solrDocument.get("id"), match);
            results.add(match);
        }

        List<Match> distinctResults = removeDuplicates(results);

        List<Match> subResults1 = new ArrayList<Match>(); //ExactMatch
        List<Match> subResults2 = new ArrayList<Match>();

        for (Match item : distinctResults) {
            if ((item.getLabel().toLowerCase().startsWith(query.toLowerCase()))) {
                subResults1.add(item);
            } else {
                subResults2.add(item);
            }
        }

        if (subResults1.size() > 0) {
            Collections.sort(subResults1);
        }

        /*
        Collections.sort(subResults1, new Comparator() {
           @Override
           public int compare(Object matchOne, Object matchTwo) {
        //use instanceof to verify the references are indeed of the type in question
        return ((Match)matchOne).getLabel()
              .compareTo(((Match)matchTwo).getLabel());
           }
        });
        */

        subResults1.addAll(subResults2);
        List<Match> subResults = subResults1;

        List<Match> finalResults = new ArrayList<Match>();

        if (subResults.size() > 10) {
            finalResults = subResults.subList(0, 10);
        } else {
            finalResults = subResults;
        }

        //subResults = finalResults;

        /*
        for(GroupCommand groupCommand : queryResponse.getGroupResponse().getValues()) {
           for (Group group : groupCommand.getValues()) {
        for (SolrDocument solrDocument : group.getResult()) {
           Match match = new Match();
           String label = filter((String) solrDocument.get("autocomplete"));
           match.setLabel(label);
           match.setValue(label);
           matchMap.put((String) solrDocument.get("id"), match);
           results.add(match);
        }
           }
        }
        */
        for (String documentId : matchMap.keySet()) {
            if (queryResponse.getHighlighting().containsKey(documentId)) {
                Map<String, List<String>> highlightedTerms = queryResponse.getHighlighting().get(documentId);
                if (highlightedTerms.containsKey("autocomplete")) {
                    matchMap.get(documentId).setLabel(highlightedTerms.get("autocomplete").get(0));
                }
            }
        }

        //return results;
        return finalResults;
    }

    private static List<Match> removeDuplicates(List<Match> list) {

        // Store unique items in result.
        List<Match> result = new ArrayList<>();

        // Record encountered Strings in HashSet.
        HashSet<String> hashset = new HashSet<>();

        // Loop over argument list.
        for (Match item : list) {

            // If String is not in set, add it to the list and the set.
            if (!hashset.contains(item.getLabel())) {
                hashset.add(item.getLabel());
                result.add(item);
            }
        }
        return result;
    }

    private String filter(String value) {
        StringBuilder out = new StringBuilder();
        StringReader strReader = new StringReader(value);
        try {
            HTMLStripCharFilter html = new HTMLStripCharFilter(new BufferedReader(strReader));
            char[] cbuf = new char[1024 * 10];
            while (true) {
                int count = html.read(cbuf);
                if (count == -1)
                    break; // end of stream mark is -1
                if (count > 0)
                    out.append(cbuf, 0, count);
            }
            html.close();
        } catch (IOException e) {
            throw new RuntimeException("Failed stripping HTML for value: " + value, e);
        }
        return out.toString();
    }

    @Override
    public Page<SolrDocument> searchForDocuments(String query, Integer pageSize, Integer pageNumber,
            Map<String, String> selectedFacets, String sort) throws SolrServerException {
        SolrQuery solrQuery = prepareQuery(query, sort, pageSize, pageNumber, selectedFacets);

        QueryResponse queryResponse = solrServer.query(solrQuery);

        Long totalResults = new Long(queryResponse.getResults().getNumFound());
        Page<SolrDocument> page = new DefaultPageImpl<SolrDocument>(totalResults.intValue(), pageNumber, pageSize,
                queryResponse.getResults(), queryResponse);
        if (selectedFacets != null) {
            page.setSelectedFacets(selectedFacets);
        }
        page.setSort(sort);

        return page;
    }

    @Override
    public T loadObjectForDocument(SolrDocument solrDocument) {
        try {
            Class clazz = Class.forName((String) solrDocument.getFieldValue("base.class_s"));
            Long id = (Long) solrDocument.getFieldValue("base.id_l");
            T t = (T) getSession().load(clazz, id);
            t.getIdentifier();
            return t;
        } catch (ClassNotFoundException cnfe) {
            throw new RuntimeException("Could not instantiate search result", cnfe);
        } catch (ObjectNotFoundException onfe) {
            return null;
        }
    }

    public CellSet analyse(String rows, String cols, Integer firstCol, Integer maxCols, Integer firstRow,
            Integer maxRows, Map<String, String> selectedFacets, String[] facets, Cube cube)
            throws SolrServerException {
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        SolrQuery totalQuery = new SolrQuery();
        totalQuery.setQuery("*:*");

        // We're not interested in the results themselves
        query.setRows(1);
        query.setStart(0);
        totalQuery.setRows(1);
        totalQuery.setStart(0);

        if (rows == null) {
            query.setFacet(true);
            query.setFacetMinCount(1);
            query.setFacetSort(FacetParams.FACET_SORT_INDEX);
            query.addFacetField(cube.getDefaultLevel());
            includeMissing(query, cube.getDefaultLevel());
            includeMissing(totalQuery, cube.getDefaultLevel());
            if (maxRows != null) {
                totalQuery.setFacet(true);
                totalQuery.setFacetMinCount(1);
                totalQuery.addFacetField("{!key=totalRows}" + cube.getDefaultLevel());

                query.add("f." + cube.getDefaultLevel() + ".facet.limit", maxRows.toString());
                query.add("f." + cube.getDefaultLevel() + ".facet.mincount", "1");
                if (firstRow != null) {
                    query.add("f." + cube.getDefaultLevel() + ".facet.offset", firstRow.toString());
                }
            }
        } else if (cols == null) {
            query.setFacet(true);
            query.setFacetMinCount(1);
            query.setFacetSort(FacetParams.FACET_SORT_INDEX);
            query.addFacetField(rows);
            includeMissing(query, rows);
            includeMissing(totalQuery, rows);
            if (maxRows != null) {
                totalQuery.setFacet(true);
                totalQuery.setFacetMinCount(1);
                totalQuery.addFacetField("{!key=totalRows}" + rows);
                query.add("f." + rows + ".facet.limit", maxRows.toString());
                query.add("f." + rows + ".facet.mincount", "1");
                if (firstRow != null) {
                    query.add("f." + rows + ".facet.offset", firstRow.toString());
                }
            }
            if (cube.getLevel(rows).isMultiValued() && cube.getLevel(rows).getHigher() != null) {
                Level higher = cube.getLevel(rows).getHigher();
                totalQuery.add("f." + rows + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
                query.add("f." + rows + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
            }
        } else {
            query.setFacet(true);
            query.setFacetMinCount(1);
            query.setFacetSort(FacetParams.FACET_SORT_INDEX);
            query.addFacetField(rows);
            includeMissing(query, rows);
            includeMissing(totalQuery, rows);
            if (maxRows != null) {
                totalQuery.setFacet(true);
                totalQuery.setFacetMinCount(1);
                totalQuery.addFacetField("{!key=totalRows}" + rows);
                query.add("f." + rows + ".facet.limit", maxRows.toString());
                query.add("f." + rows + ".facet.mincount", "1");
                if (firstRow != null) {
                    query.add("f." + rows + ".facet.offset", firstRow.toString());
                }
            }
            if (cube.getLevel(rows).isMultiValued() && cube.getLevel(rows).getHigher() != null) {
                Level higher = cube.getLevel(rows).getHigher();
                totalQuery.add("f." + rows + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
                query.add("f." + rows + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
            }
            query.addFacetField(cols);
            includeMissing(query, cols);
            if (maxCols != null) {
                totalQuery.setFacet(true);
                totalQuery.setFacetMinCount(1);
                totalQuery.addFacetField("{!key=totalCols}" + cols);
                /**
                 * Facet pivot does not behave the same way on columns - the limit is
                 */
                //query.add("f." + cols + ".facet.limit", maxCols.toString());
                //query.add("f." + cols + ".facet.mincount", "1");
                //if (firstCol != null) {
                //   query.add("f." + cols + ".facet.offset", firstCol.toString());
                //}
            }
            if (cube.getLevel(cols).isMultiValued() && cube.getLevel(cols).getHigher() != null) {
                Level higher = cube.getLevel(cols).getHigher();
                totalQuery.add("f." + cols + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
                query.add("f." + cols + ".facet.prefix", selectedFacets.get(higher.getFacet()) + "_");
            }
            query.addFacetPivotField(rows + "," + cols);
        }

        if (selectedFacets != null && !selectedFacets.isEmpty()) {
            for (String facetName : selectedFacets.keySet()) {
                String facetValue = selectedFacets.get(facetName);
                if (StringUtils.isNotEmpty(facetValue)) {
                    totalQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName));
                    query.addFilterQuery(facetName + ":" + selectedFacets.get(facetName));
                } else {//Subtract/Exclude documents with any value for the facet
                    totalQuery.addFilterQuery("-" + facetName + ":[* TO *]");
                    query.addFilterQuery("-" + facetName + ":[* TO *]");
                }
            }
        }

        if (facets != null && facets.length != 0) {
            query.setFacet(true);
            query.setFacetMinCount(1);
            query.setFacetSort(FacetParams.FACET_SORT_INDEX);

            for (String facetName : facets) {
                if (rows != null && rows.equals(facetName)) {
                } else if (cols != null && cols.equals(facetName)) {
                } else if (rows == null && facetName.equals(cube.getDefaultLevel())) {
                } else {
                    includeMissing(query, facetName);
                    query.addFacetField(facetName);
                }
            }
        }

        QueryResponse response = solrServer.query(query);
        QueryResponse totalResponse = solrServer.query(totalQuery);
        FacetField totalRows = null;
        FacetField totalCols = null;
        if (totalResponse.getFacetField("totalRows") != null) {
            totalRows = totalResponse.getFacetField("totalRows");
        }

        if (totalResponse.getFacetField("totalCols") != null) {
            totalCols = totalResponse.getFacetField("totalCols");
        }

        CellSet cellSet = new CellSet(response, selectedFacets, query, rows, cols, firstRow, maxRows, firstCol,
                maxCols, totalRows, totalCols, cube);

        return cellSet;
    }

    /**
     * Prepares a {@link SolrQuery} with the parameters passed in
     * @param query
     * @param sort
     * @param pageSize
     * @param pageNumber
     * @param selectedFacets
     * @return A {@link SolrQuery} that can be customised before passing to a {@link SolrServer}
     */
    protected SolrQuery prepareQuery(String query, String sort, Integer pageSize, Integer pageNumber,
            Map<String, String> selectedFacets) {
        SolrQuery solrQuery = new SolrQuery();

        if (query != null && !query.trim().equals("")) {
            String searchString = null;
            if (query.indexOf(":") != -1) {
                searchString = query;
            } else {
                // replace spaces with '+' so that we search on terms
                searchString = query.trim().replace(" ", "+");
                solrQuery.set("defType", "edismax");
                solrQuery.set("qf", "searchable.label_sort searchable.solrsummary_t");
            }
            solrQuery.setQuery(searchString);

        } else {
            solrQuery.set("defType", "edismax");
            solrQuery.set("qf", "searchable.label_sort searchable.solrsummary_t");
            solrQuery.setQuery("*:*");
        }

        if (sort != null && sort.length() != 0) {
            for (String singleSort : sort.split(",")) {
                if (singleSort.equals("_asc")) {
                    //Do nothing
                } else if (singleSort.endsWith("_asc")) {
                    String sortField = singleSort.substring(0, singleSort.length() - 4);
                    solrQuery.addSortField(sortField, SolrQuery.ORDER.asc);
                } else if (singleSort.endsWith("_desc")) {
                    String sortField = singleSort.substring(0, singleSort.length() - 5);
                    solrQuery.addSortField(sortField, SolrQuery.ORDER.desc);
                }
            }
        }

        if (pageSize != null) {
            solrQuery.setRows(pageSize);
            if (pageNumber != null) {
                solrQuery.setStart(pageSize * pageNumber);
            }
        }

        if (selectedFacets != null && !selectedFacets.isEmpty()) {
            for (String facetName : selectedFacets.keySet()) {
                String facetValue = selectedFacets.get(facetName);
                if (StringUtils.isNotEmpty(facetValue)) {
                    solrQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName));
                } else {//Subtract/Exclude documents with any value for the facet
                    solrQuery.addFilterQuery("-" + facetName + ":[* TO *]");
                }
            }
        }

        return solrQuery;
    }

}