org.elasticsearch.river.solr.SolrRiver.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.river.solr.SolrRiver.java

Source

/*
 * Licensed to Luca Cavanna (the "Author") under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Elastic Search licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.river.solr;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptService;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Solr river which allows to index data taken from a running Solr instance
 */
public class SolrRiver extends AbstractRiverComponent implements River {

    private final Client client;
    private final ScriptService scriptService;

    private final String solrUrl;

    private final String query;
    private final String[] filterQueries;
    private final String[] fields;
    private final String requestHandler;
    private final String uniqueKey;
    private final int rows;

    private final String indexName;
    private final String typeName;
    private final String settings;
    private final String mapping;
    private final boolean closeOnCompletion;

    private final String script;
    private final Map<String, Object> scriptParams;
    private final String scriptLang;

    private volatile BulkProcessor bulkProcessor;
    private AtomicInteger start = new AtomicInteger(0);

    private final ObjectMapper objectMapper;
    private final CloseableHttpClient httpClient;

    static final String DEFAULT_UNIQUE_KEY = "id";

    @Inject
    @SuppressWarnings("unchecked")
    protected SolrRiver(RiverName riverName, RiverSettings riverSettings, Client client,
            ScriptService scriptService) {
        super(riverName, riverSettings);
        this.client = client;
        this.scriptService = scriptService;

        this.closeOnCompletion = XContentMapValues
                .nodeBooleanValue(riverSettings.settings().get("close_on_completion"), true);

        String url = "http://localhost:8983/solr/";
        String q = "*:*";
        String uniqueKey = DEFAULT_UNIQUE_KEY;
        int rows = 10;
        String qt = "select";
        String[] fq, fl;
        fq = fl = null;
        if (riverSettings.settings().containsKey("solr")) {
            Map<String, Object> solrSettings = (Map<String, Object>) riverSettings.settings().get("solr");
            url = XContentMapValues.nodeStringValue(solrSettings.get("url"), url);
            q = XContentMapValues.nodeStringValue(solrSettings.get("q"), q);
            rows = XContentMapValues.nodeIntegerValue(solrSettings.get("rows"), rows);
            fq = readArrayOrString(solrSettings.get("fq"));
            fl = readArrayOrString(solrSettings.get("fl"));
            qt = XContentMapValues.nodeStringValue(solrSettings.get("qt"), qt);
            uniqueKey = XContentMapValues.nodeStringValue(solrSettings.get("uniqueKey"), uniqueKey);
        }
        this.solrUrl = url;
        this.query = q;
        this.rows = rows;
        this.uniqueKey = uniqueKey;
        this.filterQueries = fq;
        this.fields = fl;
        this.requestHandler = qt;

        String index = riverName.type();
        String type = "import";
        int maxConcurrentBulk = 10;
        int bulkSize = 100;
        String mapping = null;
        String settings = null;
        if (riverSettings.settings().containsKey("index")) {
            Map<String, Object> indexSettings = (Map<String, Object>) riverSettings.settings().get("index");
            index = XContentMapValues.nodeStringValue(indexSettings.get("index"), index);
            type = XContentMapValues.nodeStringValue(indexSettings.get("type"), type);
            bulkSize = XContentMapValues.nodeIntegerValue(indexSettings.get("bulk_size"), bulkSize);
            maxConcurrentBulk = XContentMapValues.nodeIntegerValue(indexSettings.get("max_concurrent_bulk"),
                    maxConcurrentBulk);
            settings = XContentMapValues.nodeStringValue(indexSettings.get("settings"), settings);
            mapping = XContentMapValues.nodeStringValue(indexSettings.get("mapping"), mapping);
        }
        this.settings = settings;
        this.mapping = mapping;
        this.indexName = index;
        this.typeName = type;

        String script = null;
        Map<String, Object> scriptParams = Maps.newHashMap();
        String scriptLang = null;
        if (riverSettings.settings().containsKey("transform")) {
            Map<String, Object> transformSettings = (Map<String, Object>) riverSettings.settings().get("transform");
            script = XContentMapValues.nodeStringValue(transformSettings.get("script"), null);
            scriptLang = XContentMapValues.nodeStringValue(transformSettings.get("lang"), null);
            scriptParams = (Map<String, Object>) transformSettings.get("params");
        }
        this.script = script;
        this.scriptParams = scriptParams;
        this.scriptLang = scriptLang;

        this.bulkProcessor = BulkProcessor.builder(client, new BulkProcessor.Listener() {
            @Override
            public void beforeBulk(long executionId, BulkRequest request) {
                logger.info("Going to execute new bulk composed of {} actions", request.numberOfActions());
            }

            @Override
            public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                logger.info("Executed bulk composed of {} actions", request.numberOfActions());
            }

            @Override
            public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                logger.warn("Error executing bulk", failure);
            }
        }).setBulkActions(bulkSize).setConcurrentRequests(maxConcurrentBulk).build();

        this.objectMapper = new ObjectMapper();
        this.objectMapper.configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false);

        this.httpClient = HttpClients.createDefault();
    }

    private String[] readArrayOrString(Object node) {
        if (XContentMapValues.isArray(node)) {
            List list = (List) node;
            String[] array = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                array[i] = XContentMapValues.nodeStringValue(list.get(i), null);
            }
            return array;
        }

        String value = XContentMapValues.nodeStringValue(node, null);
        if (value != null) {
            return new String[] { value };
        }
        return null;
    }

    @Override
    public void start() {

        if (!client.admin().indices().prepareExists(indexName).execute().actionGet().isExists()) {

            CreateIndexRequestBuilder createIndexRequest = client.admin().indices().prepareCreate(indexName);

            if (settings != null) {
                createIndexRequest.setSettings(settings);
            }
            if (mapping != null) {
                createIndexRequest.addMapping(typeName, mapping);
            }

            createIndexRequest.execute().actionGet();
        }

        StringBuilder baseSolrQuery = createSolrQuery();

        Long numFound = null;
        int startParam;
        while ((startParam = start.getAndAdd(rows)) == 0 || startParam < numFound) {
            String solrQuery = baseSolrQuery.toString() + "&start=" + startParam;
            CloseableHttpResponse httpResponse = null;
            try {
                logger.info("Sending query to Solr: {}", solrQuery);
                httpResponse = httpClient.execute(new HttpGet(solrQuery));

                if (httpResponse.getStatusLine().getStatusCode() != 200) {
                    logger.error("Solr returned non ok status code: {}",
                            httpResponse.getStatusLine().getReasonPhrase());
                    EntityUtils.consume(httpResponse.getEntity());
                    continue;
                }

                JsonNode jsonNode = objectMapper.readTree(EntityUtils.toString(httpResponse.getEntity()));
                JsonNode response = jsonNode.get("response");
                JsonNode numFoundNode = response.get("numFound");
                numFound = numFoundNode.asLong();
                if (logger.isWarnEnabled() && numFound == 0) {
                    logger.warn("The solr query {} returned 0 documents", solrQuery);
                }

                Iterator<JsonNode> docsIterator = response.get("docs").iterator();

                while (docsIterator.hasNext()) {
                    JsonNode docNode = docsIterator.next();

                    try {
                        JsonNode uniqueKeyNode = docNode.get(uniqueKey);

                        if (uniqueKeyNode == null) {
                            logger.error("The uniqueKey value is null");
                        } else {
                            String id = uniqueKeyNode.asText();
                            ((ObjectNode) docNode).remove(uniqueKey);

                            IndexRequest indexRequest = Requests.indexRequest(indexName).type(typeName).id(id);
                            String source = objectMapper.writeValueAsString(docNode);
                            if (this.script == null) {
                                indexRequest.source(source);
                            } else {
                                Tuple<XContentType, Map<String, Object>> newSourceAndContent = transformDocument(
                                        source);
                                indexRequest.source(newSourceAndContent.v2(), newSourceAndContent.v1());
                            }

                            bulkProcessor.add(indexRequest);

                        }
                    } catch (IOException e) {
                        logger.warn("Error while importing documents from solr to elasticsearch", e);
                    }
                }
            } catch (IOException e) {
                logger.error("Error while executing the solr query [" + solrQuery + "]", e);
                bulkProcessor.close();
                try {
                    httpClient.close();
                } catch (IOException ioe) {
                    logger.warn(e.getMessage(), ioe);
                }
                //if a query fails the next ones are most likely going to fail too
                return;
            } finally {
                if (httpResponse != null) {
                    try {
                        httpResponse.close();
                    } catch (IOException e) {
                        logger.error(e.getMessage(), e);
                    }
                }
            }
        }

        bulkProcessor.close();
        try {
            httpClient.close();
        } catch (IOException e) {
            logger.warn(e.getMessage(), e);
        }

        logger.info("Data import from solr to elasticsearch completed");

        if (closeOnCompletion) {
            logger.info("Deleting river");
            client.admin().indices().prepareDeleteMapping("_river").setType(riverName.name()).execute();
        }
    }

    @SuppressWarnings("unchecked")
    protected Tuple<XContentType, Map<String, Object>> transformDocument(String jsonDocument) {
        Tuple<XContentType, Map<String, Object>> sourceAndContent = XContentHelper
                .convertToMap(jsonDocument.getBytes(Charset.forName("utf-8")), true);

        Map<String, Object> ctx = new HashMap<String, Object>(2);
        ctx.put("_source", sourceAndContent.v2());

        try {
            ExecutableScript executableScript = scriptService.executable(scriptLang, script,
                    ScriptService.ScriptType.INLINE, scriptParams);
            executableScript.setNextVar("ctx", ctx);
            executableScript.run();
            ctx = (Map<String, Object>) executableScript.unwrap(ctx);
        } catch (Exception e) {
            throw new ElasticsearchIllegalArgumentException("failed to execute script", e);
        }

        final Map<String, Object> updatedSourceAsMap = (Map<String, Object>) ctx.get("_source");
        return new Tuple<XContentType, Map<String, Object>>(sourceAndContent.v1(), updatedSourceAsMap);
    }

    protected StringBuilder createSolrQuery() {
        StringBuilder queryBuilder = new StringBuilder(solrUrl);

        if (Strings.hasLength(requestHandler)) {
            if (queryBuilder.charAt(queryBuilder.length() - 1) != '/') {
                queryBuilder.append("/");
            }
            queryBuilder.append(requestHandler);
        }

        queryBuilder.append("?q=").append(encode(query)).append("&wt=json");
        if (filterQueries != null) {
            for (String filterQuery : filterQueries) {
                queryBuilder.append("&fq=").append(encode(filterQuery));
            }
        }
        if (fields != null) {
            queryBuilder.append("&fl=");
            for (int i = 0; i < fields.length; i++) {
                if (i > 0) {
                    queryBuilder.append(encode(" "));
                }
                queryBuilder.append(encode(fields[i]));
            }
        }

        queryBuilder.append("&rows=").append(rows);
        return queryBuilder;
    }

    private static String encode(String value) {
        try {
            return URLEncoder.encode(value, "utf-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void close() {

    }
}