cz.zcu.kiv.eegdatabase.logic.search.FulltextSearchService.java Source code

Java tutorial

Introduction

Here is the source code for cz.zcu.kiv.eegdatabase.logic.search.FulltextSearchService.java

Source

/*******************************************************************************
 * This file is part of the EEG-database project
 * 
 *   ==========================================
 *  
 *   Copyright (C) 2013 by University of West Bohemia (http://www.zcu.cz/en/)
 *  
 *  ***********************************************************************************************************************
 *  
 *   Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 *   the License. You may obtain a copy of the License at
 *  
 *       http://www.apache.org/licenses/LICENSE-2.0
 *  
 *   Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 *   an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 *   specific language governing permissions and limitations under the License.
 *  
 *  ***********************************************************************************************************************
 *  
 *   FulltextSearchService.java, 2013/10/02 00:01 Jakub Rinkes
 ******************************************************************************/
package cz.zcu.kiv.eegdatabase.logic.search;

import cz.zcu.kiv.eegdatabase.logic.indexing.IndexField;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.*;

/**
 * The full text search service. Provides search and faceting methods.
 * User: Jan Koren
 * Date: 12.3.13
 */
@Service
public class FulltextSearchService {

    @Autowired
    private SolrServer solrServer;

    protected Log log = LogFactory.getLog(getClass());

    /**
     * Gets results for a given query string.
     * @param inputQuery The input query.
     * @param start The index of the first returned result.
     * @param count Number of retrieved results.
     * @return List of full text results.
     */
    public List<FullTextResult> getResultsForQuery(String inputQuery, ResultCategory category, int start,
            int count) {

        SolrQuery query = configureQuery(inputQuery, category, start, count);
        // fetches a response to the query
        QueryResponse response = null;
        try {
            response = solrServer.query(query);
        } catch (SolrServerException e) {
            return new ArrayList<FullTextResult>();
        }
        List<SolrDocument> foundDocuments = response.getResults();
        List<FullTextResult> results = new ArrayList<FullTextResult>();
        for (SolrDocument document : foundDocuments) {
            FullTextResult result = new FullTextResult();
            String uuid = (String) document.getFieldValue(IndexField.UUID.getValue());
            int id = (Integer) document.getFieldValue(IndexField.ID.getValue());
            String type = (String) document.getFieldValue(IndexField.CLASS.getValue());
            Date timestamp = (Date) document.getFieldValue(IndexField.TIMESTAMP.getValue());
            String source = (String) document.getFieldValue(IndexField.SOURCE.getValue());
            if (source == null) {
                source = "";
            }

            // return text with highlighted words.
            String title = getHighlightedText(response, uuid, IndexField.TITLE);
            List<String> textFragments = new ArrayList<String>();
            String description = getHighlightedText(response, uuid, IndexField.TEXT);
            addNotEmpty(description, textFragments);
            String name = getHighlightedText(response, uuid, IndexField.NAME);
            addNotEmpty(name, textFragments);
            addNotEmpty(getHighlightedText(response, uuid, IndexField.CHILD_TITLE), textFragments);
            addNotEmpty(getHighlightedText(response, uuid, IndexField.CHILD_TEXT), textFragments);

            result.setUuid(uuid);
            result.setId(id);
            result.setTargetPage(FullTextSearchUtils.getTargetPage(type));
            result.setTimestamp(timestamp);
            result.setType(type);
            result.setSource(source);

            result.setTitle(title);
            result.setTextFragments(textFragments);

            results.add(result);
        }

        return results;
    }

    /**
     * Adds a string element to the list only and if only the element is not empty.
     * @param text
     * @param list
     */
    private void addNotEmpty(String text, List<String> list) {
        if (!text.isEmpty()) {
            list.add(text);
        }
    }

    /**
     * Configures the query to be processed.
     * @param inputQuery The query string.
     * @param start Index of the first returned result.
     * @param count Number of results we wish to display.
     * @return Configured query.
     */
    private SolrQuery configureQuery(String inputQuery, ResultCategory category, int start, int count) {
        SolrQuery query = new SolrQuery();
        query.setQuery(inputQuery);
        if (category != null && !category.equals(ResultCategory.ALL)) {
            query.addFilterQuery(IndexField.CLASS.getValue() + ":\"" + category.getValue() + "\"");
        }
        query.setStart(start);
        query.setRows(count);
        query.setHighlightSimplePre(FullTextSearchUtils.HIGHLIGHTED_TEXT_BEGIN);
        query.setHighlightSimplePost(FullTextSearchUtils.HIGHLIGHTED_TEXT_END);
        return query;
    }

    /**
     * Gets text that contains highlighted search words.
     * Neighbouring highlighted words are merged into one highlighted phrase.
     * @param response The response to the search query, contains search results.
     * @param uuid id of the found document.
     * @param field searched field.
     * @return Text with or without highlighted words, depending whether the text contains any of the searched words.
     *
     */
    private String getHighlightedText(QueryResponse response, String uuid, IndexField field) {
        List<String> highlightedTextList = response.getHighlighting().get(uuid).get(field.getValue());
        if (highlightedTextList == null) {
            return "";
        }
        StringBuffer sb = new StringBuffer("");
        for (String highlightedText : highlightedTextList) {
            sb.append(highlightedText.replace(FullTextSearchUtils.HIGHLIGHT_MERGE_SEQUENCE, " "));
        }
        return sb.toString();
    }

    /**
     * Fetches all indexed documents.
     * @return All indexed documents.
     * @throws SolrServerException
     */
    public List<FullTextResult> getAllResults() throws SolrServerException {
        int resultsFound = getTotalNumberOfDocumentsForQuery("*", ResultCategory.ALL);
        return getResultsForQuery("*", ResultCategory.ALL, 0, resultsFound);
    }

    /**
     * Gets values to autocomplete for the input string.
     * The autocomplete feature works for multivalued fields and is based on a highlighting trick.
     * See http://solr.pl/en/2013/02/25/autocomplete-on-multivalued-fields-using-highlighting/
     * @param keywordStart
     * @return Autocomplete values.
     * @throws SolrServerException
     */
    public Set<String> getTextToAutocomplete(String keywordStart) throws SolrServerException {
        SolrQuery query = new SolrQuery();
        query.setQuery(IndexField.AUTOCOMPLETE.getValue() + ":" + keywordStart);
        query.setFields(IndexField.AUTOCOMPLETE.getValue());
        query.setHighlight(true);
        query.setParam("hl.fl", IndexField.AUTOCOMPLETE.getValue());
        query.setHighlightSimplePre("");
        query.setHighlightSimplePost("");
        query.setRows(FullTextSearchUtils.AUTOCOMPLETE_ROWS);

        Map<String, Integer> map = new TreeMap<String, Integer>();
        QueryResponse response = solrServer.query(query);

        Set<String> foundIds = response.getHighlighting().keySet();
        for (String id : foundIds) {

            List<String> resultsPerDocument = response.getHighlighting().get(id)
                    .get(IndexField.AUTOCOMPLETE.getValue());
            if (resultsPerDocument != null) {
                for (String result : resultsPerDocument) {
                    String resultValue;
                    int resultFrequency;
                    int delimiterPosition = result.lastIndexOf('#');
                    if (delimiterPosition == -1) { // autocomplete phrase was copied from title
                        resultValue = result;
                        resultFrequency = 0;
                    } else {
                        resultValue = result.substring(0, delimiterPosition);
                        try {
                            resultFrequency = Integer
                                    .valueOf(result.substring(delimiterPosition + 1, result.length()));
                        } catch (NumberFormatException e) {
                            resultFrequency = 0;
                        }
                    }

                    map.put(resultValue.toLowerCase(), resultFrequency);
                }
            }

            if (map.size() == FullTextSearchUtils.AUTOCOMPLETE_ROWS) {
                break;
            }
        }

        map = sortByValue(map);
        return map.keySet();
    }

    /**
     * Deletes all documents form the index.
     * @throws IOException
     * @throws SolrServerException
     */
    public void cleanupIndex() throws IOException, SolrServerException {
        solrServer.deleteByQuery("*:*");
    }

    /**
     * Gets the number of all documents matching the query.
     * @param queryString The query string.
     * @return The number of all documents matching the query.
     */
    public int getTotalNumberOfDocumentsForQuery(String queryString, ResultCategory category) {
        SolrQuery q = new SolrQuery();
        q.setQuery(queryString);
        if (category != null && !category.equals(ResultCategory.ALL)) {
            q.setFilterQueries(IndexField.CLASS.getValue() + ":\"" + category.getValue() + "\"");
        }
        q.setRows(0); // don't actually request any data
        try {
            return (int) solrServer.query(q).getResults().getNumFound();
        } catch (SolrServerException e) {
            log.error(e);
            e.getCause().printStackTrace();
        }
        return 0;
    }

    /**
     * Helper method for sorting a map by its map values.
     * @param map The map to be sorted.
     * @param <K> the map key type
     * @param <V> the map value type
     * @return Sorted map.
     */
    public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map) {
        List<Map.Entry<K, V>> list = new LinkedList<Map.Entry<K, V>>(map.entrySet());
        Collections.sort(list, new Comparator<Map.Entry<K, V>>() {
            public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2) {
                return (o2.getValue()).compareTo(o1.getValue());
            }
        });

        Map<K, V> result = new LinkedHashMap<K, V>();
        for (Map.Entry<K, V> entry : list) {
            result.put(entry.getKey(), entry.getValue());
        }
        return result;
    }

    /**
     * Given the input query, finds out the total count of full text results for each full text result type.
     * @param solrQuery The search query.
     * @return Map containg category-count pairs.
     */
    public Map<String, Long> getCategoryFacets(String solrQuery) {
        SolrQuery query = new SolrQuery(solrQuery);
        query.setParam("fl", IndexField.UUID.getValue());
        query.setHighlight(false);
        query.setFacet(true);
        query.addFacetField(IndexField.CLASS.getValue());

        Map<String, Long> results = new HashMap<String, Long>();
        QueryResponse response = null;
        try {
            response = solrServer.query(query);
        } catch (SolrServerException e) {
            log.error(e);
        }
        long totalCount = 0;
        List<FacetField> facets = response.getFacetFields();
        for (FacetField field : facets) {
            log.info("count: " + field.getValueCount());
            List<FacetField.Count> facetEntries = field.getValues();
            for (FacetField.Count count : facetEntries) {
                long countValue = count.getCount();
                results.put(count.getName(), countValue);
                log.info(count.getName() + ", " + countValue);
                totalCount += countValue;
            }
        }

        // add a "facet" for all results
        results.put(ResultCategory.ALL.getValue(), totalCount);

        return results;
    }
}