org.intermine.api.searchengine.solr.SolrIndexHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.intermine.api.searchengine.solr.SolrIndexHandler.java

Source

package org.intermine.api.searchengine.solr;

/*
 * Copyright (C) 2002-2018 FlyMine
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  See the LICENSE file for more
 * information or http://www.gnu.org/copyleft/lesser.html.
 *
 */

import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition;
import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.common.SolrInputDocument;
import org.intermine.api.searchengine.IndexHandler;
import org.intermine.api.searchengine.KeywordSearchFacetData;
import org.intermine.api.searchengine.KeywordSearchPropertiesManager;
import org.intermine.metadata.FieldDescriptor;
import org.intermine.objectstore.ObjectStore;
import org.intermine.util.ObjectPipe;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Solr Implementation of IndexHandler
 *
 * @author arunans23
 */
public final class SolrIndexHandler implements IndexHandler {
    private static final Logger LOG = Logger.getLogger(SolrIndexHandler.class);

    //this field type is analyzed
    private static final String ANALYZED_FIELD_TYPE_NAME = "analyzed_string";

    //this field type is not analyzed
    private static final String RAW_FIELD_TYPE_NAME = "raw_string";

    private ObjectPipe<SolrInputDocument> indexingQueue = new ObjectPipe<SolrInputDocument>(100000);

    @Override
    public void createIndex(ObjectStore os, Map<String, List<FieldDescriptor>> classKeys) throws IOException {
        long time = System.currentTimeMillis();
        LOG.debug("Creating keyword search index...");

        SolrClient solrClient = SolrClientManager.getClientInstance(os);

        //delete previous documents in solr

        LOG.debug("Delete previous index begins");
        long deleteStartTime = System.currentTimeMillis();

        createFieldTypeDefinitions(solrClient);

        try {
            solrClient.deleteByQuery("*:*");
            solrClient.commit();

        } catch (SolrServerException e) {
            LOG.error("Deleting old index failed", e);
        }

        LOG.debug(
                "Delete previous index ends and it took " + (System.currentTimeMillis() - deleteStartTime) + "ms");

        KeywordSearchPropertiesManager keywordSearchPropertiesManager = KeywordSearchPropertiesManager
                .getInstance(os);

        addFieldNameToSchema("classname", ANALYZED_FIELD_TYPE_NAME, false, true, solrClient);
        addFieldNameToSchema("Category", "string", false, true, solrClient);

        for (KeywordSearchFacetData facetData : keywordSearchPropertiesManager.getFacets()) {
            for (String field : facetData.getFields()) {
                addFieldNameToSchema(field, ANALYZED_FIELD_TYPE_NAME, false, true, solrClient);
                addFieldNameToSchema("facet_" + field, "string", false, true, solrClient);
                addCopyFieldToSchema(field, "facet_" + field, solrClient);
            }
        }

        LOG.info("Starting fetcher thread...");
        SolrObjectHandler fetchThread = new SolrObjectHandler(os, keywordSearchPropertiesManager.getClassKeys(),
                indexingQueue, keywordSearchPropertiesManager.getIgnoredClasses(),
                keywordSearchPropertiesManager.getIgnoredFields(),
                keywordSearchPropertiesManager.getSpecialReferences(),
                keywordSearchPropertiesManager.getClassBoost(), keywordSearchPropertiesManager.getFacets(),
                keywordSearchPropertiesManager.getAttributePrefixes(), solrClient);
        fetchThread.start();

        int indexed = 0;

        List<SolrInputDocument> solrInputDocuments = new ArrayList<SolrInputDocument>();

        // loop and index while we still have fetchers running
        LOG.debug("Starting to index...");

        long indexStartTime = System.currentTimeMillis();

        int tempDocs = 0;
        long tempTime = System.currentTimeMillis();

        while (indexingQueue.hasNext()) {
            SolrInputDocument doc = indexingQueue.next();

            solrInputDocuments.add(doc);

            indexed++;

            if (solrInputDocuments.size() == keywordSearchPropertiesManager.getIndexBatchSize()) {

                tempTime = System.currentTimeMillis();

                addSolrDocuments(solrClient, solrInputDocuments);

                tempDocs = indexed - tempDocs;

                LOG.info("docs indexed=" + indexed + "; thread state=" + fetchThread.getState() + "; docs/ms="
                        + tempDocs * 1.0F / (System.currentTimeMillis() - tempTime) + "; memory="
                        + Runtime.getRuntime().freeMemory() / 1024 + "k/" + Runtime.getRuntime().maxMemory() / 1024
                        + "k" + "; time=" + (System.currentTimeMillis() - time) + "ms");

                solrInputDocuments.clear();
            }

        }

        addSolrDocuments(solrClient, solrInputDocuments);

        commit(solrClient);

        if (keywordSearchPropertiesManager.getEnableOptimize()) {
            optimize(solrClient);
        }

        LOG.debug("Solr indexing ends and it took " + (System.currentTimeMillis() - indexStartTime) + "ms");

        if (fetchThread.getException() != null) {

            throw new RuntimeException("Indexing failed.", fetchThread.getException());
        }

        time = System.currentTimeMillis() - time;
        int seconds = (int) Math.floor(time / 1000);
        LOG.info("Indexing of " + indexed + " documents finished in "
                + String.format("%02d:%02d.%03d", (int) Math.floor(seconds / 60), seconds % 60, time % 1000)
                + " minutes");
    }

    private void addSolrDocuments(SolrClient solrClient, List<SolrInputDocument> solrDocumentList)
            throws IOException {
        //Accessing SchemaAPI from solr and create the schema dynamically

        if (solrDocumentList.size() != 0) {

            LOG.debug("Beginning to commit Solr Documents into Solr");

            try {
                UpdateResponse response = solrClient.add(solrDocumentList, 30000);

            } catch (SolrServerException e) {

                LOG.error("Error while commiting the SolrInputdocuments to the Solrclient. "
                        + "Make sure the Solr instance is up", e);

                e.printStackTrace();
            }
        }

    }

    private void addFieldNameToSchema(String fieldName, String fieldType, boolean stored, boolean indexed,
            SolrClient solrClient) throws IOException {

        Map<String, Object> fieldAttributes = new HashMap();
        fieldAttributes.put("name", fieldName);
        fieldAttributes.put("type", fieldType);
        fieldAttributes.put("stored", stored);
        fieldAttributes.put("indexed", indexed);
        fieldAttributes.put("multiValued", true);
        fieldAttributes.put("required", false);

        try {
            SchemaRequest.AddField schemaRequest = new SchemaRequest.AddField(fieldAttributes);
            SchemaResponse.UpdateResponse response = schemaRequest.process(solrClient);

        } catch (SolrServerException e) {
            LOG.error("Error while adding fields to the solrclient.", e);

            e.printStackTrace();
        }

    }

    private void addCopyFieldToSchema(String source, String dest, SolrClient solrClient) throws IOException {

        try {

            List<String> copyFieldAttributes = new ArrayList<String>();
            copyFieldAttributes.add(dest);

            SchemaRequest.DeleteCopyField deleteCopyField = new SchemaRequest.DeleteCopyField(source,
                    copyFieldAttributes);

            SchemaResponse.UpdateResponse deleteCopyFieldRes = deleteCopyField.process(solrClient);

            SchemaRequest.AddCopyField schemaCopyRequest = new SchemaRequest.AddCopyField(source,
                    copyFieldAttributes);

            SchemaResponse.UpdateResponse copyFieldResponse = schemaCopyRequest.process(solrClient);

        } catch (SolrServerException e) {
            LOG.error("Error while adding copyfields to the solrclient.", e);
            e.printStackTrace();
        }
    }

    private void commit(SolrClient solrClient) throws IOException {
        try {
            solrClient.commit();

        } catch (SolrServerException e) {
            LOG.error("Error while commiting.", e);
            e.printStackTrace();
        }
    }

    private void optimize(SolrClient solrClient) throws IOException {

        long startTime = System.currentTimeMillis();

        try {
            solrClient.optimize();

            LOG.info("Optimizing Solr Index finished in " + (System.currentTimeMillis() - startTime) + "ms");

        } catch (SolrServerException e) {
            LOG.error("Error while optimizing", e);
            e.printStackTrace();
        }

    }

    private void createFieldTypeDefinitions(SolrClient solrClient) throws IOException {
        FieldTypeDefinition analyzedFieldTypeDefinition = new FieldTypeDefinition();

        Map<String, Object> analyzedFieldTypeAttributes = new HashMap();
        analyzedFieldTypeAttributes.put("name", ANALYZED_FIELD_TYPE_NAME);
        analyzedFieldTypeAttributes.put("class", "solr.TextField");
        analyzedFieldTypeAttributes.put("positionIncrementGap", 100);
        analyzedFieldTypeAttributes.put("multiValued", true);

        AnalyzerDefinition indexAnalyzerDefinition1 = new AnalyzerDefinition();
        Map<String, Object> indexTokenizerAttributes1 = new HashMap<String, Object>();
        indexTokenizerAttributes1.put("class", "solr.WhitespaceTokenizerFactory");
        indexAnalyzerDefinition1.setTokenizer(indexTokenizerAttributes1);
        Map<String, Object> indexLowerCaseFilterAttributes1 = new HashMap<String, Object>();
        indexLowerCaseFilterAttributes1.put("class", "solr.LowerCaseFilterFactory");
        List<Map<String, Object>> indexFilterAttributes1 = new ArrayList<Map<String, Object>>();
        indexFilterAttributes1.add(indexLowerCaseFilterAttributes1);
        indexAnalyzerDefinition1.setFilters(indexFilterAttributes1);

        AnalyzerDefinition queryAnalyzerDefinition1 = new AnalyzerDefinition();
        Map<String, Object> queryTokenizerAttributes1 = new HashMap<String, Object>();
        queryTokenizerAttributes1.put("class", "solr.WhitespaceTokenizerFactory");
        queryAnalyzerDefinition1.setTokenizer(queryTokenizerAttributes1);
        Map<String, Object> queryLowerCaseFilterAttributes1 = new HashMap<String, Object>();
        queryLowerCaseFilterAttributes1.put("class", "solr.LowerCaseFilterFactory");
        List<Map<String, Object>> queryFilterAttributes1 = new ArrayList<Map<String, Object>>();
        queryFilterAttributes1.add(queryLowerCaseFilterAttributes1);
        queryAnalyzerDefinition1.setFilters(queryFilterAttributes1);

        analyzedFieldTypeDefinition.setAttributes(analyzedFieldTypeAttributes);
        analyzedFieldTypeDefinition.setIndexAnalyzer(indexAnalyzerDefinition1);
        analyzedFieldTypeDefinition.setQueryAnalyzer(queryAnalyzerDefinition1);

        try {
            SchemaRequest.AddFieldType schemaRequest = new SchemaRequest.AddFieldType(analyzedFieldTypeDefinition);

            SchemaResponse.UpdateResponse response = schemaRequest.process(solrClient);

        } catch (SolrServerException e) {
            LOG.error("Error while adding fieldtype '" + ANALYZED_FIELD_TYPE_NAME + "' to the solrclient.", e);

            e.printStackTrace();
        }

        FieldTypeDefinition rawFieldTypeDefinition = new FieldTypeDefinition();

        Map<String, Object> rawFieldTypeAttributes = new HashMap();
        rawFieldTypeAttributes.put("name", RAW_FIELD_TYPE_NAME);
        rawFieldTypeAttributes.put("class", "solr.TextField");
        rawFieldTypeAttributes.put("positionIncrementGap", 100);
        rawFieldTypeAttributes.put("multiValued", true);

        AnalyzerDefinition indexAnalyzerDefinition2 = new AnalyzerDefinition();
        Map<String, Object> indexTokenizerAttributes2 = new HashMap<String, Object>();
        indexTokenizerAttributes2.put("class", "solr.KeywordTokenizerFactory");
        indexAnalyzerDefinition2.setTokenizer(indexTokenizerAttributes2);
        Map<String, Object> indexLowerCaseFilterAttributes2 = new HashMap<String, Object>();
        indexLowerCaseFilterAttributes2.put("class", "solr.LowerCaseFilterFactory");
        List<Map<String, Object>> indexFilterAttributes2 = new ArrayList<Map<String, Object>>();
        indexFilterAttributes2.add(indexLowerCaseFilterAttributes2);
        indexAnalyzerDefinition2.setFilters(indexFilterAttributes2);

        AnalyzerDefinition queryAnalyzerDefinition2 = new AnalyzerDefinition();
        Map<String, Object> queryTokenizerAttributes2 = new HashMap<String, Object>();
        queryTokenizerAttributes2.put("class", "solr.KeywordTokenizerFactory");
        queryAnalyzerDefinition2.setTokenizer(queryTokenizerAttributes2);
        Map<String, Object> queryLowerCaseFilterAttributes2 = new HashMap<String, Object>();
        queryLowerCaseFilterAttributes2.put("class", "solr.LowerCaseFilterFactory");
        List<Map<String, Object>> queryFilterAttributes2 = new ArrayList<Map<String, Object>>();
        queryFilterAttributes2.add(queryLowerCaseFilterAttributes2);
        queryAnalyzerDefinition2.setFilters(queryFilterAttributes2);

        rawFieldTypeDefinition.setAttributes(rawFieldTypeAttributes);
        rawFieldTypeDefinition.setIndexAnalyzer(indexAnalyzerDefinition2);
        rawFieldTypeDefinition.setQueryAnalyzer(queryAnalyzerDefinition2);

        try {
            SchemaRequest.AddFieldType schemaRequest = new SchemaRequest.AddFieldType(rawFieldTypeDefinition);

            SchemaResponse.UpdateResponse response = schemaRequest.process(solrClient);

        } catch (SolrServerException e) {
            LOG.error("Error while adding fieldtype '" + RAW_FIELD_TYPE_NAME + "' to the solrclient.", e);

            e.printStackTrace();
        }
    }

}