org.apache.metron.elasticsearch.bulk.ElasticsearchBulkDocumentWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.elasticsearch.bulk.ElasticsearchBulkDocumentWriter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.metron.elasticsearch.bulk;

import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.metron.elasticsearch.client.ElasticsearchClient;
import org.apache.metron.indexing.dao.update.Document;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;

/**
 * Writes documents to an Elasticsearch index in bulk.
 *
 * @param <D> The type of document to write.
 */
public class ElasticsearchBulkDocumentWriter<D extends Document> implements BulkDocumentWriter<D> {

    /**
     * A {@link Document} along with the index it will be written to.
     */
    private class Indexable {
        D document;
        String index;

        public Indexable(D document, String index) {
            this.document = document;
            this.index = index;
        }
    }

    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private ElasticsearchClient client;
    private List<Indexable> documents;
    private WriteRequest.RefreshPolicy refreshPolicy;

    public ElasticsearchBulkDocumentWriter(ElasticsearchClient client) {
        this.client = client;
        this.documents = new ArrayList<>();
        this.refreshPolicy = WriteRequest.RefreshPolicy.NONE;
    }

    @Override
    public void addDocument(D document, String indexName) {
        documents.add(new Indexable(document, indexName));
        LOG.debug("Adding document to batch; document={}, index={}", document, indexName);
    }

    @Override
    public BulkDocumentWriterResults<D> write() {
        BulkDocumentWriterResults<D> results = new BulkDocumentWriterResults<>();
        try {
            // create an index request for each document
            BulkRequest bulkRequest = new BulkRequest();
            bulkRequest.setRefreshPolicy(refreshPolicy);
            for (Indexable doc : documents) {
                DocWriteRequest request = createRequest(doc.document, doc.index);
                bulkRequest.add(request);
            }

            // submit the request and handle the response
            BulkResponse bulkResponse = client.getHighLevelClient().bulk(bulkRequest);
            handleBulkResponse(bulkResponse, documents, results);

        } catch (IOException e) {
            // assume all documents have failed
            for (Indexable indexable : documents) {
                D failed = indexable.document;
                results.addFailure(failed, e, ExceptionUtils.getRootCauseMessage(e));
            }
            LOG.error("Failed to submit bulk request; all documents failed", e);

        } finally {
            // flush all documents no matter which ones succeeded or failed
            documents.clear();
        }

        LOG.debug("Wrote document(s) to Elasticsearch; batchSize={}, success={}, failed={}", documents.size(),
                results.getSuccesses().size(), results.getFailures().size());
        return results;
    }

    @Override
    public int size() {
        return documents.size();
    }

    public ElasticsearchBulkDocumentWriter<D> withRefreshPolicy(WriteRequest.RefreshPolicy refreshPolicy) {
        this.refreshPolicy = refreshPolicy;
        return this;
    }

    private IndexRequest createRequest(D document, String index) {
        if (document.getTimestamp() == null) {
            throw new IllegalArgumentException("Document must contain the timestamp");
        }

        // if updating an existing document, the doc ID should be defined.
        // if creating a new document, set the doc ID to null to allow Elasticsearch to generate one.
        String docId = document.getDocumentID().orElse(null);
        if (LOG.isDebugEnabled() && document.getDocumentID().isPresent()) {
            LOG.debug("Updating existing document with known doc ID; docID={}, guid={}, sensorType={}", docId,
                    document.getGuid(), document.getSensorType());
        } else if (LOG.isDebugEnabled()) {
            LOG.debug("Creating a new document, doc ID not yet known; guid={}, sensorType={}", document.getGuid(),
                    document.getSensorType());
        }

        return new IndexRequest().source(document.getDocument()).type(document.getSensorType() + "_doc")
                .index(index).id(docId).index(index).timestamp(document.getTimestamp().toString());
    }

    /**
     * Handles the {@link BulkResponse} received from Elasticsearch.
     * @param bulkResponse The response received from Elasticsearch.
     * @param documents The documents included in the bulk request.
     * @param results The writer results.
     */
    private void handleBulkResponse(BulkResponse bulkResponse, List<Indexable> documents,
            BulkDocumentWriterResults<D> results) {
        if (bulkResponse.hasFailures()) {

            // interrogate the response to distinguish between those that succeeded and those that failed
            for (BulkItemResponse response : bulkResponse) {
                if (response.isFailed()) {
                    // request failed
                    D failed = getDocument(response.getItemId());
                    Exception cause = response.getFailure().getCause();
                    String message = response.getFailureMessage();
                    results.addFailure(failed, cause, message);

                } else {
                    // request succeeded
                    D success = getDocument(response.getItemId());
                    success.setDocumentID(response.getResponse().getId());
                    results.addSuccess(success);
                }
            }
        } else {
            // all requests succeeded
            for (Indexable success : documents) {
                results.addSuccess(success.document);
            }
        }
    }

    private D getDocument(int index) {
        return documents.get(index).document;
    }
}