lumbermill.internal.elasticsearch.ElasticSearchOkHttpClientImpl.java Source code

Java tutorial

Introduction

Here is the source code for lumbermill.internal.elasticsearch.ElasticSearchOkHttpClientImpl.java

Source

/*
 * Copyright 2016 Sony Mobile Communications, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package lumbermill.internal.elasticsearch;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.squareup.okhttp.*;
import lumbermill.api.JsonEvent;
import lumbermill.api.Observables;
import lumbermill.api.Timer;
import lumbermill.elasticsearch.ElasticSearchBulkRequestEvent;
import lumbermill.elasticsearch.ElasticSearchBulkResponseEvent;
import lumbermill.elasticsearch.FatalIndexException;
import lumbermill.elasticsearch.IndexFailedException;
import lumbermill.internal.MapWrap;
import lumbermill.internal.StringTemplate;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import rx.Observable;
import rx.subjects.ReplaySubject;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URI;
import java.net.URL;
import java.time.Duration;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import static java.util.stream.Collectors.toList;

/**
 * Elasticsearch BULK API client based on OkHttpClient. This client attempts to be logstash compatible.
 *
 * Currently the bulk api support _/bulk only, not by index and type.
 *
 * With index prefix:
 * <pre><code>
 *    new ElasticSearchOkHttpClient("http://host:port", "lumbermill-", "atype", true);
 *</code></pre>
 * No index prefix:
 * <pre><code>
 *    new ElasticSearchOkHttpClient("http://host:port", "lumbermill", "atype", false);
 * </code></pre>
 *
 * It supports both "normal" Elasticsearch and AWS Elasticsearch identity based access control.
 * To use AWS, lumbermill-aws RequestSigner implementation must be used.
 *
 *
 */
@SuppressWarnings("unused")
public class ElasticSearchOkHttpClientImpl {

    public static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchOkHttpClientImpl.class);

    public static final MediaType TEXT = MediaType.parse("application/text; charset=utf-8");
    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    private static final String AUTHORIZATION_HEADER_NAME = "Authorization";

    /**
     * Http client is shared
     */
    private final OkHttpClient client = new OkHttpClient();
    {
        String https_proxy = System.getenv("https_proxy");
        // Support for empty value as well as null
        if (StringUtils.isNotEmpty(https_proxy)) {
            URI proxy = URI.create(https_proxy);
            LOGGER.info("Using proxy {}", proxy);
            client.setProxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxy.getHost(), proxy.getPort())));
        }
        client.setRetryOnConnectionFailure(true);
    }

    private final boolean indexIsPrefix;

    private String timestampField = "@timestamp";

    private final URL url;
    private final StringTemplate index;
    private final StringTemplate type;
    private Optional<String> basicAuthHeader = Optional.empty();

    private Optional<StringTemplate> documentId = Optional.empty();

    private Optional<RequestSigner> signer = Optional.empty();

    private Timer.Factory timerFactory = Observables.fixedTimer(2000);
    private int retryAttempts = 20;

    public ElasticSearchOkHttpClientImpl(String esUrl, String index, String type, boolean isPrefix) {
        this.indexIsPrefix = isPrefix;
        try {
            this.url = new URL(esUrl + (esUrl.endsWith("/") ? "_bulk" : "/_bulk"));
        } catch (MalformedURLException e) {
            throw new IllegalStateException(e);
        }

        this.index = StringTemplate.compile(index);
        this.type = StringTemplate.compile(type);
    }

    /**
     * Provides access to the underlying http client, useful in tests and
     * to customize things we have not thought about.
     */
    public OkHttpClient client() {
        return client;
    }

    /**
     * Set AWS Signer if using AWS Elasticsearch as service with identity based access
     */
    public ElasticSearchOkHttpClientImpl withSigner(RequestSigner signer) {
        this.signer = Optional.of(signer);
        return this;
    }

    /**
     * Change timestamp field if you are not using @timestamp. The correct fieldname is currently
     * required in order to create correct timestamped indices.
     */
    public ElasticSearchOkHttpClientImpl withTimestampField(String field) {
        this.timestampField = field;
        return this;
    }

    public ElasticSearchOkHttpClientImpl withRetryTimer(Timer.Factory timer, int attempts) {
        this.timerFactory = timer;
        this.retryAttempts = attempts;
        return this;
    }

    public ElasticSearchOkHttpClientImpl withDispatcher(Dispatcher dispatcher) {
        this.client.setDispatcher(dispatcher);
        return this;
    }

    public ElasticSearchOkHttpClientImpl withBasicAuth(String user, String passwd) {
        this.basicAuthHeader = Optional.of(Credentials.basic(user, passwd));
        return this;
    }

    /**
     * Set the _id of each document in bulk, supports StringTemplate.
     * @param documentId - Is a patten for extracting the _id to use
     * @return
     */
    public ElasticSearchOkHttpClientImpl withDocumentId(String documentId) {
        this.documentId = Optional.of(StringTemplate.compile(documentId));
        return this;
    }

    public Observable<ElasticSearchBulkResponseEvent> postEvent(ElasticSearchBulkRequestEvent requestEvent)
            throws IndexFailedException {

        // TODO - Not sure how we really want this to work...
        List<JsonEvent> batch = requestEvent.indexRequests().stream()
                .map(jsonEventJsonEventTuple2 -> jsonEventJsonEventTuple2.getSecond()).collect(toList());
        LOGGER.debug("Sending batch of {} events", batch.size());
        RequestContext request = new RequestContext(batch, new ElasticSearchRequest(batch, url));
        post(request);
        return request.subject;

    }

    public Observable<ElasticSearchBulkResponseEvent> post(List<JsonEvent> batch) throws IndexFailedException {
        if (batch.isEmpty()) {
            LOGGER.info("Event batch is empty, skipping");
            Observable.just(batch);
        }
        LOGGER.debug("Sending batch of {} events", batch.size());
        RequestContext request = new RequestContext(batch, new ElasticSearchRequest(batch, url));
        post(request);
        return request.subject;

    }

    public void post(RequestContext request) throws IndexFailedException {

        if (signer.isPresent()) {
            LOGGER.trace("Found RequestSigner, signing request");
            signer.get().sign(request.signableRequest);
        }

        doOkHttpPost(request);
    }

    /**
     * Handles the response from OkHttp client to determine if the response was ok, is retryable or not
     * @param request - Used for logging purposed if the request was a 400 BadRequest
     * @param response
     */
    private void handleResponse(RequestContext request, Response response) {

        if (response.code() == 200) {
            ElasticSearchBulkResponse bulkResponse = ElasticSearchBulkResponse.parse(request.signableRequest,
                    response);
            if (bulkResponse.hasErrors()) {
                Optional<Observable<RequestContext>> requestContextObservable = request.nextAttempt(bulkResponse);
                if (!requestContextObservable.isPresent()) {
                    request.done(bulkResponse);
                } else {
                    requestContextObservable.get().doOnNext(requestContext -> post(requestContext))
                            .doOnError(throwable -> request.error(throwable)).subscribe();
                }
            } else {
                request.done(bulkResponse);
            }
            return;
        }

        if (response.code() == 400) {
            request.error(createFatalIndexException(request.signableRequest, response));
            return;
        }

        request.error(IndexFailedException.of(response));
    }

    /**
     * Possible to override this method to perform postSuccess operations
     */
    protected void success(RequestSigner.SignableRequest request, Response response) {

    }

    /**
     * Converts the events to a BulkApi request
     */
    protected StringBuilder toBulkApiRequest(List<JsonEvent> batch) {

        StringBuilder builder = new StringBuilder();

        batch.stream().forEach((event -> builder.append(indexRowWithDateAndType(event)).append("\n")
                .append(event.toString(false)).append("\n")));
        return builder;
    }

    private String indexRowWithDateAndType(JsonEvent event) {

        Optional<String> formattedType = type.format(event);
        if (!formattedType.isPresent()) {
            throw new IllegalStateException(
                    "Issue with type, could not extract field from event " + type.original());
        }

        Optional<String> formattedIndex = index.format(event);
        if (!formattedIndex.isPresent()) {
            throw new IllegalStateException(
                    "Issue with index, could not extract field from event: " + index.original());
        }

        Optional<String> formattedDocumentId = Optional.empty();
        if (documentId.isPresent()) {
            formattedDocumentId = documentId.get().format(event);
            if (!formattedDocumentId.isPresent()) {
                throw new IllegalStateException(
                        "Issue with index, could not extract field from event: " + index.original());
            }
        }

        ObjectNode objectNode = OBJECT_MAPPER.createObjectNode();
        ObjectNode data = OBJECT_MAPPER.createObjectNode();

        // Prepare for adding day to index for each event
        if (indexIsPrefix) {
            LocalDate indexDate;
            // TODO: Not sure how to handle this... what should be the behaviour if the specified timestamp field
            //       does not exist
            if (event.has(this.timestampField)) {
                indexDate = LocalDate.parse(event.valueAsString(this.timestampField).substring(0, 10),
                        DateTimeFormatter.ISO_DATE);
            } else {
                indexDate = LocalDate.now();
            }
            data.put("_index", formattedIndex.get() + indexDate.format(DateTimeFormatter.ofPattern("yyyy.MM.dd")));
        } else {
            data.put("_index", formattedIndex.get());
        }
        data.put("_type", formattedType.get());

        if (formattedDocumentId.isPresent()) {
            data.put("_id", formattedDocumentId.get());
        }
        objectNode.set("index", data);

        return objectNode.toString();
    }

    protected void doOkHttpPost(RequestContext requestCtx) {
        RequestBody body = RequestBody.create(TEXT, requestCtx.signableRequest.payload().get());
        Request request = new Request.Builder().url(url).post(body)
                .headers(Headers.of(requestCtx.signableRequest.headers())).build();

        // Add some sanity logging to be able to figure out the load
        if (LOGGER.isDebugEnabled()) {
            int requestsInQueue = client.getDispatcher().getQueuedCallCount();
            int requestsInProgress = client.getDispatcher().getRunningCallCount();
            if (requestsInQueue > 0) {
                LOGGER.debug("There are {} requests waiting to be processed", requestsInQueue);
            }
            if (requestsInProgress > 0) {
                LOGGER.debug("There are {} requests currently executing", requestsInProgress);
            }
        }

        client.newCall(request).enqueue(new Callback() {
            @Override
            public void onFailure(Request request, IOException e) {
                requestCtx.error(IndexFailedException.ofIOException(e));
            }

            @Override
            public void onResponse(Response response) throws IOException {
                handleResponse(requestCtx, response);
            }
        });
    }

    public static FatalIndexException createFatalIndexException(RequestSigner.SignableRequest request,
            Response response) {
        try {
            return new FatalIndexException(
                    response.code() + ", message:" + response.message() + ", body: " + response.body().string());
        } catch (IOException e) {
            LOGGER.warn("Failed to extract body from error message: " + e.getMessage());
            return new FatalIndexException(response.code() + ", message:" + response.message());
        }
    }

    private class ElasticSearchRequest implements RequestSigner.SignableRequest {

        private final List<JsonEvent> events;
        private final Optional<byte[]> payload;
        private final URL url;
        public final Map<String, String> headers;

        public ElasticSearchRequest(List<JsonEvent> events, URL url) {
            this.events = events;
            this.url = url;
            headers = MapWrap.of("host", url.getHost()).toMap();
            payload = Optional.of(toBulkApiRequest(events).toString().getBytes());
        }

        @Override
        public String uri() {
            return "/_bulk";
        }

        @Override
        public String method() {
            return "POST";
        }

        @Override
        public Map<String, String> queryParams() {
            return Collections.emptyMap();
        }

        @Override
        public Map<String, String> headers() {
            // It is not possible to create a client with both signed and basic-auth so we do not have to worry here.
            if (basicAuthHeader.isPresent()) {
                headers.put(AUTHORIZATION_HEADER_NAME, basicAuthHeader.get());
            }
            return headers;
        }

        @Override
        public Optional<byte[]> payload() {
            return payload;
        }

        @Override
        public List<JsonEvent> original() {
            return events;
        }

        @Override
        public void addSignedHeaders(Map<String, String> headers) {
            this.headers.putAll(headers);
        }
    }

    /**
     * Contains state in order to track retries as well as returning response to pipeline.
     */
    private class RequestContext {

        public final ReplaySubject<ElasticSearchBulkResponseEvent> subject = ReplaySubject.createWithSize(1);

        /**
         * Keep them here until we are done, then return them
         */
        public final List<JsonEvent> events;

        public ElasticSearchBulkResponseEvent response;

        /**
         * Request to execute
         */
        public RequestSigner.SignableRequest signableRequest;

        /**
         * Attempt count
         */
        public AtomicInteger attempt = new AtomicInteger(1);

        /**
         * Start time of the request
         */
        public long start = System.currentTimeMillis();

        /**
         * The timer for each request
         */
        public Timer timer = ElasticSearchOkHttpClientImpl.this.timerFactory.create();

        public RequestContext(List<JsonEvent> events, RequestSigner.SignableRequest signableRequest) {
            this.events = events;
            this.signableRequest = signableRequest;

        }

        private boolean hasNextAttempt() {
            return attempt.get() > ElasticSearchOkHttpClientImpl.this.retryAttempts ? false : true;
        }

        public Optional<Observable<RequestContext>> nextAttempt(ElasticSearchBulkResponse result) {
            updateResponseEvent(result);
            Optional<RequestSigner.SignableRequest> signableRequest = failedRecords(result);
            if (!signableRequest.isPresent()) {
                return Optional.empty();
            }
            this.signableRequest = signableRequest.get();
            this.attempt.incrementAndGet();
            if (!hasNextAttempt()) {
                FatalIndexException ex = FatalIndexException.of("Too many retries");
                error(ex);
                throw ex;
            }

            return Optional.of(Observables.just(this).withDelay(timer));
        }

        private synchronized void updateResponseEvent(ElasticSearchBulkResponse bulkResponse) {
            if (this.response == null) {
                response = ElasticSearchBulkResponseEvent.of(bulkResponse);
            } else {
                response = response.nextAttempt(bulkResponse);
            }
        }

        private Optional<RequestSigner.SignableRequest> failedRecords(ElasticSearchBulkResponse result) {
            List<JsonEvent> retryableItems = result.getRetryableItems(this.signableRequest);

            if (retryableItems.size() == 0) {
                // This occurs if we got errors but they where all 400 BAD REQUEST, these will not be retried
                return Optional.empty();
            }
            return Optional.of(new ElasticSearchRequest(retryableItems, url));
        }

        public void done(ElasticSearchBulkResponse bulkResponse) {
            // Fix return ALL + ES response
            LOGGER.debug("Request took {} seconds / {} attempts / events {}",
                    Duration.ofMillis(System.currentTimeMillis() - start).getSeconds(), attempt.get(),
                    events.size());
            updateResponseEvent(bulkResponse);
            this.subject.onNext(response);
            this.subject.onCompleted();
        }

        public void error(Throwable t) {
            LOGGER.debug("Request took {} seconds / {} attempts and ended with error {}:{}",
                    Duration.ofMillis(System.currentTimeMillis() - start).getSeconds(), attempt.get(),
                    t.getClass().getSimpleName(), t.getMessage());
            this.subject.onError(t);
        }
    }
}