de.ingrid.admin.elasticsearch.IndexImpl.java Source code

Java tutorial

Introduction

Here is the source code for de.ingrid.admin.elasticsearch.IndexImpl.java

Source

/*
 * **************************************************-
 * ingrid-iplug-se-iplug
 * ==================================================
 * Copyright (C) 2014 - 2018 wemove digital solutions GmbH
 * ==================================================
 * Licensed under the EUPL, Version 1.1 or  as soon they will be
 * approved by the European Commission - subsequent versions of the
 * EUPL (the "Licence");
 * 
 * You may not use this work except in compliance with the Licence.
 * You may obtain a copy of the Licence at:
 * 
 * http://ec.europa.eu/idabc/eupl5
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the Licence is distributed on an "AS IS" basis,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the Licence for the specific language governing permissions and
 * limitations under the Licence.
 * **************************************************#
 */
package de.ingrid.admin.elasticsearch;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import de.ingrid.admin.Config;
import de.ingrid.admin.JettyStarter;
import de.ingrid.admin.elasticsearch.converter.QueryConverter;
import de.ingrid.utils.ElasticDocument;
import de.ingrid.utils.IDetailer;
import de.ingrid.utils.IRecordLoader;
import de.ingrid.utils.ISearcher;
import de.ingrid.utils.IngridDocument;
import de.ingrid.utils.IngridHit;
import de.ingrid.utils.IngridHitDetail;
import de.ingrid.utils.IngridHits;
import de.ingrid.utils.PlugDescription;
import de.ingrid.utils.dsc.Column;
import de.ingrid.utils.dsc.Record;
import de.ingrid.utils.query.IngridQuery;

@Component
public class IndexImpl implements ISearcher, IDetailer, IRecordLoader {

    public static final String DETAIL_URL = "url";

    private static Logger log = Logger.getLogger(IndexImpl.class);

    private QueryConverter queryConverter;

    private FacetConverter facetConverter;

    private static final String ELASTIC_SEARCH_INDEX = "es_index";

    private static final String ELASTIC_SEARCH_INDEX_TYPE = "es_type";

    // SearchType see:
    // http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-search-type.html
    private SearchType searchType = null;

    private Config config;

    private String[] detailFields;

    private IndexManager indexManager;

    @Autowired
    public IndexImpl(IndexManager indexManager, QueryConverter qc, FacetConverter fc) {
        this.config = JettyStarter.getInstance().config;
        this.indexManager = indexManager;
        this.searchType = ElasticSearchUtils.getSearchTypeFromString(config.searchType);
        this.detailFields = (String[]) ArrayUtils.addAll(
                new String[] { config.indexFieldTitle, config.indexFieldSummary },
                config.additionalSearchDetailFields);

        try {
            this.queryConverter = qc;
            this.facetConverter = fc;

            log.info("Elastic Search Settings: " + indexManager.printSettings());

        } catch (Exception e) {
            log.error("Error during initialization of ElasticSearch-Client!");
            e.printStackTrace();
        }

    }

    @Override
    public IngridHits search(IngridQuery ingridQuery, int startHit, int num) {

        // convert InGrid-query to QueryBuilder
        QueryBuilder query = queryConverter.convert(ingridQuery);

        QueryBuilder funcScoreQuery = null;
        if (config.indexEnableBoost) {
            funcScoreQuery = queryConverter.addScoreModifier(query);
        }

        boolean isLocationSearch = containsBoundingBox(ingridQuery);
        boolean hasFacets = ingridQuery.containsKey("FACETS");
        String[] instances = getSearchInstances(ingridQuery);

        // request grouping information from index if necessary
        // see IndexImpl.getHitsFromResponse for usage
        String groupedBy = ingridQuery.getGrouped();
        String[] fields = null;
        if (IngridQuery.GROUPED_BY_PARTNER.equalsIgnoreCase(groupedBy)) {
            fields = new String[] { IngridQuery.PARTNER };
        } else if (IngridQuery.GROUPED_BY_ORGANISATION.equalsIgnoreCase(groupedBy)) {
            fields = new String[] { IngridQuery.PROVIDER };
        } else if (IngridQuery.GROUPED_BY_DATASOURCE.equalsIgnoreCase(groupedBy)) {
            // the necessary value id the results ID
        }

        String[] indexNames = JettyStarter.getInstance().config.docProducerIndices;
        // search prepare
        SearchRequestBuilder srb = indexManager.getClient().prepareSearch(indexNames).setSearchType(searchType)
                .setQuery(config.indexEnableBoost ? funcScoreQuery : query) // Query
                .setFrom(startHit).setSize(num).setExplain(false);

        // search only in defined types within the index, if defined
        if (instances.length > 0) {
            srb.setTypes(instances);
        }

        if (fields == null) {
            srb = srb.setNoFields();
        } else {
            srb = srb.addFields(fields);
        }

        // Filter for results only with location information
        if (isLocationSearch) {
            srb.setPostFilter(FilterBuilders.existsFilter("x1"));
        }

        // pre-processing: add facets/aggregations to the query
        if (hasFacets) {
            List<AbstractAggregationBuilder> aggregations = facetConverter.getAggregations(ingridQuery);
            for (AbstractAggregationBuilder aggregation : aggregations) {
                srb.addAggregation(aggregation);
            }
        }

        if (log.isDebugEnabled()) {
            log.debug("Final Elastic Search Query: \n" + srb);
        }

        // search!
        try {
            SearchResponse searchResponse = srb.execute().actionGet();

            // convert to IngridHits
            IngridHits hits = getHitsFromResponse(searchResponse, ingridQuery);

            // post-processing: extract and convert facets to InGrid-Document
            if (hasFacets) {
                // add facets from response
                IngridDocument facets = facetConverter.convertFacetResultsToDoc(searchResponse);
                hits.put("FACETS", facets);
            }

            return hits;
        } catch (SearchPhaseExecutionException ex) {
            log.error("Search failed on index: " + indexNames, ex);
            return new IngridHits(0, new IngridHit[0]);
        }
    }

    private boolean containsBoundingBox(IngridQuery ingridQuery) {
        boolean found = ingridQuery.containsField("x1");

        // also try to look in clauses 
        if (!found) {
            for (IngridQuery clause : ingridQuery.getAllClauses()) {
                if (clause.containsField("x1")) {
                    return true;
                }
            }
        }
        return found;
    }

    /**
     * Check first the query for a hidden field which contains the information of the instances to search in for. If there's none, then use
     * the defined one in the configuration. The parameter in the query should be only used for an internal search within the iPlug.
     * 
     * @param ingridQuery
     * @return
     */
    private String[] getSearchInstances(IngridQuery ingridQuery) {
        String[] instances = (String[]) ingridQuery.getArray("searchInInstances");
        if (instances == null || instances.length == 0) {
            instances = JettyStarter.getInstance().config.indexSearchInTypes.toArray(new String[0]);
        }
        return instances;
    }

    /**
     * Create InGrid hits from ES hits. Add grouping information.
     * 
     * @param searchResponse
     * @param ingridQuery
     * @return
     */
    private IngridHits getHitsFromResponse(SearchResponse searchResponse, IngridQuery ingridQuery) {
        for (ShardSearchFailure failure : searchResponse.getShardFailures()) {
            log.error("Error searching in index: " + failure.reason());
        }

        SearchHits hits = searchResponse.getHits();

        // the size will not be bigger than it was requested in the query with
        // 'num'
        // so we can convert from long to int here!
        int length = (int) hits.getHits().length;
        int totalHits = (int) hits.getTotalHits();
        IngridHit[] hitArray = new IngridHit[length];
        int pos = 0;

        if (log.isDebugEnabled()) {
            log.debug("Received " + length + " from " + totalHits + " hits.");
        }

        String groupBy = ingridQuery.getGrouped();
        for (SearchHit hit : hits.hits()) {
            IngridHit ingridHit = new IngridHit(config.communicationProxyUrl, hit.getId(), -1, hit.getScore());
            ingridHit.put(ELASTIC_SEARCH_INDEX, hit.getIndex());
            ingridHit.put(ELASTIC_SEARCH_INDEX_TYPE, hit.getType());

            // get grouing information, add if exist
            String groupValue = null;
            if (IngridQuery.GROUPED_BY_PARTNER.equalsIgnoreCase(groupBy)) {
                SearchHitField field = hit.field(IngridQuery.PARTNER);
                if (field != null) {
                    groupValue = field.getValue().toString();
                }
            } else if (IngridQuery.GROUPED_BY_ORGANISATION.equalsIgnoreCase(groupBy)) {
                SearchHitField field = hit.field(IngridQuery.PROVIDER);
                if (field != null) {
                    groupValue = field.getValue().toString();
                }
            } else if (IngridQuery.GROUPED_BY_DATASOURCE.equalsIgnoreCase(groupBy)) {
                groupValue = config.communicationProxyUrl;
                if (config.groupByUrl) {
                    try {
                        groupValue = new URL(hit.getId()).getHost();
                    } catch (MalformedURLException e) {
                        log.warn("can not group url: " + groupValue, e);
                    }
                }
            }
            if (groupValue != null) {
                ingridHit.addGroupedField(groupValue);
            }

            hitArray[pos] = ingridHit;
            pos++;
        }

        IngridHits ingridHits = new IngridHits(totalHits, hitArray);

        return ingridHits;
    }

    @Override
    public IngridHitDetail getDetail(IngridHit hit, IngridQuery ingridQuery, String[] requestedFields) {
        for (int i = 0; i < requestedFields.length; i++) {
            requestedFields[i] = requestedFields[i].toLowerCase();
        }
        String documentId = hit.getDocumentId();
        String fromIndex = hit.getString(ELASTIC_SEARCH_INDEX);
        String fromType = hit.getString(ELASTIC_SEARCH_INDEX_TYPE);
        String[] allFields = (String[]) ArrayUtils.addAll(detailFields, requestedFields);

        // We have to search here again, to get a highlighted summary of the result!
        QueryBuilder query = QueryBuilders.boolQuery()
                .must(QueryBuilders.matchQuery(IngridDocument.DOCUMENT_UID, documentId))
                .must(queryConverter.convert(ingridQuery));

        // search prepare
        SearchRequestBuilder srb = indexManager.getClient().prepareSearch(fromIndex).setTypes(fromType)
                .setSearchType(searchType).setQuery(query) // Query
                .setFrom(0).setSize(1).addHighlightedField(config.indexFieldSummary).addFields(allFields)
                .setExplain(false);

        SearchResponse searchResponse = srb.execute().actionGet();

        SearchHits dHits = searchResponse.getHits();
        SearchHit dHit = dHits.getAt(0);

        String title = "untitled";
        if (dHit.field(config.indexFieldTitle) != null) {
            title = (String) dHit.field(config.indexFieldTitle).getValue();
        }
        String summary = "";
        // try to get the summary first from the highlighted fields
        if (dHit.getHighlightFields().containsKey(config.indexFieldSummary)) {
            summary = StringUtils.join(dHit.getHighlightFields().get(config.indexFieldSummary).fragments(),
                    " ... ");
            // otherwise get it from the original field
        } else if (dHit.field(config.indexFieldSummary) != null) {
            summary = (String) dHit.field(config.indexFieldSummary).getValue();
        }

        IngridHitDetail detail = new IngridHitDetail(hit, title, summary);

        addPlugDescriptionInformations(detail, requestedFields);

        detail.setDocumentId(documentId);
        if (requestedFields != null) {
            for (String field : requestedFields) {
                if (dHit.field(field) != null) {
                    if (dHit.field(field).getValues() instanceof List) {
                        if (dHit.field(field).getValues().size() > 1) {
                            detail.put(field, dHit.field(field).getValues());
                        } else {
                            if (dHit.field(field).getValue() instanceof String) {
                                detail.put(field, new String[] { dHit.field(field).getValue() });
                            } else {
                                detail.put(field, dHit.field(field).getValue());
                            }
                        }
                    } else if (dHit.field(field).getValue() instanceof String) {
                        detail.put(field, new String[] { dHit.field(field).getValue() });
                    } else {
                        detail.put(field, dHit.field(field).getValue());
                    }
                }
            }
        }

        // add additional fields to detail object (such as url for iPlugSE)
        for (String extraDetail : config.additionalSearchDetailFields) {
            SearchHitField field = dHit.getFields().get(extraDetail);
            if (field != null) {
                detail.put(extraDetail, field.getValue());
            }
        }

        return detail;
    }

    private void addPlugDescriptionInformations(IngridHitDetail detail, String[] fields) {
        for (int i = 0; i < fields.length; i++) {
            if (fields[i].equals(PlugDescription.PARTNER)) {
                detail.setArray(PlugDescription.PARTNER, config.partner);
            } else if (fields[i].equals(PlugDescription.PROVIDER)) {
                detail.setArray(PlugDescription.PROVIDER, config.provider);
            }
        }
    }

    @Override
    public IngridHitDetail[] getDetails(IngridHit[] hits, IngridQuery ingridQuery, String[] requestedFields) {
        for (int i = 0; i < requestedFields.length; i++) {
            requestedFields[i] = requestedFields[i].toLowerCase();
        }
        List<IngridHitDetail> details = new ArrayList<IngridHitDetail>();
        for (IngridHit hit : hits) {
            details.add(getDetail(hit, ingridQuery, requestedFields));
        }
        return details.toArray(new IngridHitDetail[0]);
    }

    @Override
    // FIXME: is destroyed automatically via the BEAN!!!
    public void close() {
        try {
            indexManager.shutdown();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public ElasticDocument getDocById(Object id) {
        String idAsString = String.valueOf(id);
        String[] indexNames = JettyStarter.getInstance().config.docProducerIndices;
        // itereate over all indices until document was found
        for (String indexName : indexNames) {
            Map<String, Object> source = indexManager.getClient().prepareGet(indexName, null, idAsString)
                    .setFetchSource(config.indexFieldsIncluded, config.indexFieldsExcluded).execute().actionGet()
                    .getSource();

            if (source != null) {
                return new ElasticDocument(source);
            }
        }

        return null;
    }

    @SuppressWarnings("unchecked")
    @Override
    public Record getRecord(IngridHit hit) throws Exception {
        String documentId = hit.getDocumentId();
        ElasticDocument document = getDocById(documentId);
        String[] fields = document.keySet().toArray(new String[0]);
        Record record = new Record();
        for (String name : fields) {
            Object stringValue = document.get(name);
            if (stringValue instanceof List) {
                for (String item : (List<String>) stringValue) {
                    Column column = new Column(null, name, null, true);
                    column.setTargetName(name);
                    record.addColumn(column, item);
                }
            } else {
                Column column = new Column(null, name, null, true);
                column.setTargetName(name);
                record.addColumn(column, stringValue);
            }
        }
        return record;
    }

}