com.b2international.index.es.EsDocumentWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.b2international.index.es.EsDocumentWriter.java

Source

/*
 * Copyright 2017-2018 B2i Healthcare Pte Ltd, http://b2i.sg
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.b2international.index.es;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Sets.newHashSet;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.elasticsearch.action.DocWriteRequest.OpType;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkItemResponse.Failure;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.ScriptType;

import com.b2international.index.BulkUpdate;
import com.b2international.index.DocSearcher;
import com.b2international.index.IndexClientFactory;
import com.b2international.index.IndexException;
import com.b2international.index.Writer;
import com.b2international.index.es.admin.EsIndexAdmin;
import com.b2international.index.es.client.EsClient;
import com.b2international.index.es.query.EsQueryBuilder;
import com.b2international.index.mapping.DocumentMapping;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimap;
import com.google.common.collect.Table;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;

/**
 * @since 5.10 
 */
public class EsDocumentWriter implements Writer {

    private static final int DEFAULT_MAX_NUMBER_OF_VERSION_CONFLICT_RETRIES = 5;

    private static final int BATCHS_SIZE = 10_000;

    private final EsIndexAdmin admin;
    private final DocSearcher searcher;

    private final Random random = new Random();
    private final Table<Class<?>, String, Object> indexOperations = HashBasedTable.create();
    private final Multimap<Class<?>, String> deleteOperations = HashMultimap.create();
    private final ObjectMapper mapper;
    private List<BulkUpdate<?>> updateOperations = newArrayList();

    public EsDocumentWriter(EsIndexAdmin admin, DocSearcher searcher, ObjectMapper mapper) {
        this.admin = admin;
        this.searcher = searcher;
        this.mapper = mapper;
    }

    @Override
    public void put(String key, Object object) {
        indexOperations.put(object.getClass(), key, object);
    }

    @Override
    public <T> void putAll(Map<String, T> objectsByKey) {
        objectsByKey.forEach(this::put);
    }

    @Override
    public <T> void bulkUpdate(BulkUpdate<T> update) throws IOException {
        updateOperations.add(update);
    }

    @Override
    public void remove(Class<?> type, String key) throws IOException {
        removeAll(Collections.singletonMap(type, ImmutableSet.of(key)));
    }

    @Override
    public void removeAll(Map<Class<?>, Set<String>> keysByType) throws IOException {
        for (Class<?> type : keysByType.keySet()) {
            deleteOperations.putAll(type, keysByType.get(type));
        }
    }

    @Override
    public void commit() throws IOException {
        if (indexOperations.isEmpty() && deleteOperations.isEmpty() && updateOperations.isEmpty()) {
            return;
        }

        final Set<DocumentMapping> mappingsToRefresh = Collections.synchronizedSet(newHashSet());
        final EsClient client = admin.client();
        // apply bulk updates first
        final ListeningExecutorService executor;
        if (updateOperations.size() > 1) {
            executor = MoreExecutors
                    .listeningDecorator(Executors.newFixedThreadPool(Math.min(4, updateOperations.size())));
        } else {
            executor = MoreExecutors.newDirectExecutorService();
        }
        final List<ListenableFuture<?>> updateFutures = newArrayList();
        for (BulkUpdate<?> update : updateOperations) {
            updateFutures.add(executor.submit(() -> bulkUpdate(client, update, mappingsToRefresh)));
        }
        try {
            executor.shutdown();
            Futures.allAsList(updateFutures).get();
            executor.awaitTermination(10, TimeUnit.SECONDS);
        } catch (InterruptedException | ExecutionException e) {
            throw new IndexException("Couldn't execute bulk updates", e);
        }

        // then bulk indexes/deletes
        if (!indexOperations.isEmpty() || !deleteOperations.isEmpty()) {
            final BulkProcessor processor = client.bulk(new BulkProcessor.Listener() {
                @Override
                public void beforeBulk(long executionId, BulkRequest request) {
                    admin.log().debug("Sending bulk request {}", request.numberOfActions());
                }

                @Override
                public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                    admin.log().error("Failed bulk request", failure);
                }

                @Override
                public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                    admin.log().debug("Successfully processed bulk request ({}) in {}.", request.numberOfActions(),
                            response.getTook());
                    if (response.hasFailures()) {
                        for (BulkItemResponse itemResponse : response.getItems()) {
                            checkState(!itemResponse.isFailed(), "Failed to commit bulk request in index '%s', %s",
                                    admin.name(), itemResponse.getFailureMessage());
                        }
                    }
                }
            }).setConcurrentRequests(getConcurrencyLevel()).setBulkActions(10_000)
                    .setBulkSize(new ByteSizeValue(10L, ByteSizeUnit.MB)).build();

            for (Class<?> type : ImmutableSet.copyOf(indexOperations.rowKeySet())) {
                final Map<String, Object> indexOperationsForType = indexOperations.row(type);

                final DocumentMapping mapping = admin.mappings().getMapping(type);
                final String typeString = mapping.typeAsString();
                final String typeIndex = admin.getTypeIndex(mapping);

                mappingsToRefresh.add(mapping);

                for (Entry<String, Object> entry : Iterables.consumingIterable(indexOperationsForType.entrySet())) {
                    final String id = entry.getKey();
                    if (!deleteOperations.containsValue(id)) {
                        final Object obj = entry.getValue();
                        final Set<String> hashedFields = mapping.getHashedFields();
                        final byte[] _source;

                        if (!hashedFields.isEmpty()) {
                            final ObjectNode objNode = mapper.valueToTree(obj);
                            final ObjectNode hashedNode = mapper.createObjectNode();

                            // Preserve property order, share references with objNode
                            for (String hashedField : hashedFields) {
                                JsonNode value = objNode.get(hashedField);
                                if (value != null && !value.isNull()) {
                                    hashedNode.set(hashedField, value);
                                }
                            }

                            final byte[] hashedBytes = mapper.writeValueAsBytes(hashedNode);
                            final HashCode hashCode = Hashing.sha1().hashBytes(hashedBytes);

                            // Inject the result as an extra field into the to-be-indexed JSON content
                            objNode.put(DocumentMapping._HASH, hashCode.toString());
                            _source = mapper.writeValueAsBytes(objNode);

                        } else {
                            _source = mapper.writeValueAsBytes(obj);
                        }

                        processor.add(new IndexRequest(typeIndex, typeString, id).opType(OpType.INDEX)
                                .source(_source, XContentType.JSON));
                    }
                }

                for (String id : deleteOperations.removeAll(type)) {
                    processor.add(new DeleteRequest(typeIndex, typeString, id));
                }

                // Flush processor between index boundaries
                processor.flush();
            }

            // Remaining delete operations can be executed on their own
            for (Class<?> type : ImmutableSet.copyOf(deleteOperations.keySet())) {
                final DocumentMapping mapping = admin.mappings().getMapping(type);
                final String typeString = mapping.typeAsString();
                final String typeIndex = admin.getTypeIndex(mapping);

                mappingsToRefresh.add(mapping);

                for (String id : deleteOperations.removeAll(type)) {
                    processor.add(new DeleteRequest(typeIndex, typeString, id));
                }

                // Flush processor between index boundaries
                processor.flush();
            }

            try {
                processor.awaitClose(5, TimeUnit.MINUTES);
            } catch (InterruptedException e) {
                throw new IndexException("Interrupted bulk processing part of the commit", e);
            }
        }

        // refresh the index if there were only updates
        admin.refresh(mappingsToRefresh);
    }

    private void bulkUpdate(final EsClient client, final BulkUpdate<?> update,
            Set<DocumentMapping> mappingsToRefresh) {
        final DocumentMapping mapping = admin.mappings().getMapping(update.getType());
        final QueryBuilder query = new EsQueryBuilder(mapping).build(update.getFilter());
        final String rawScript = mapping.getScript(update.getScript()).script();
        org.elasticsearch.script.Script script = new org.elasticsearch.script.Script(ScriptType.INLINE, "painless",
                rawScript, ImmutableMap.copyOf(update.getParams()));

        long versionConflicts = 0;
        int attempts = DEFAULT_MAX_NUMBER_OF_VERSION_CONFLICT_RETRIES;
        final String index = admin.getTypeIndex(mapping);
        final String type = mapping.typeAsString();

        do {

            try {

                BulkByScrollResponse response = client.updateByQuery(index, type, BATCHS_SIZE, script,
                        getConcurrencyLevel(), query);

                final long updateCount = response.getUpdated();
                final long deleteCount = response.getDeleted();
                final long noops = response.getNoops();
                final List<Failure> failures = response.getBulkFailures();

                versionConflicts = response.getVersionConflicts();

                boolean updated = updateCount > 0;
                if (updated) {
                    mappingsToRefresh.add(mapping);
                    admin.log().info("Updated {} {} documents with script '{}'", updateCount, type,
                            update.getScript());
                }

                boolean deleted = deleteCount > 0;
                if (deleted) {
                    mappingsToRefresh.add(mapping);
                    admin.log().info("Deleted {} {} documents with script '{}'", deleteCount, type,
                            update.getScript());
                }

                if (!updated && !deleted) {
                    admin.log().warn(
                            "Couldn't bulk update '{}' documents with script '{}', no-ops ({}), conflicts ({})",
                            type, update.getScript(), noops, versionConflicts);
                }

                if (failures.size() > 0) {
                    boolean versionConflictsOnly = true;
                    for (Failure failure : failures) {
                        final String failureMessage = failure.getCause().getMessage();
                        final int failureStatus = failure.getStatus().getStatus();

                        if (failureStatus != RestStatus.CONFLICT.getStatus()) {
                            versionConflictsOnly = false;
                            admin.log().error("Index failure during bulk update: {}", failureMessage);
                        } else {
                            admin.log().warn("Version conflict reason: {}", failureMessage);
                        }
                    }
                    if (!versionConflictsOnly) {
                        throw new IllegalStateException(
                                "There were indexing failures during bulk updates. See logs for all failures.");
                    }
                }

                if (attempts <= 0) {
                    throw new IndexException(
                            "There were indexing failures during bulk updates. See logs for all failures.", null);
                }

                if (versionConflicts > 0) {
                    --attempts;
                    try {
                        Thread.sleep(100 + random.nextInt(900));
                        admin.refresh(Collections.singleton(mapping));
                    } catch (InterruptedException e) {
                        throw new IndexException("Interrupted", e);
                    }
                }
            } catch (IOException e) {
                throw new IndexException("Could not execute bulk update.", e);
            }
        } while (versionConflicts > 0);
    }

    private int getConcurrencyLevel() {
        return (int) admin.settings().get(IndexClientFactory.COMMIT_CONCURRENCY_LEVEL);
    }

    /*
     * Testing only, dumps a text representation of all operations to the console
     */
    private void dumpOps() throws IOException {
        System.err.println("Added documents:");
        for (Entry<Class<?>, Map<String, Object>> indexOperationsByType : indexOperations.rowMap().entrySet()) {
            for (Entry<String, Object> entry : indexOperationsByType.getValue().entrySet()) {
                System.err.format("\t%s -> %s\n", entry.getKey(), mapper.writeValueAsString(entry.getValue()));
            }
        }
        System.err.println("Deleted documents: ");
        for (Class<?> type : deleteOperations.keySet()) {
            System.err.format("\t%s -> %s\n", admin.mappings().getMapping(type).typeAsString(),
                    deleteOperations.get(type));
        }
        System.err.println("Bulk updates: ");
        for (BulkUpdate<?> update : updateOperations) {
            System.err.format("\t%s -> %s, %s, %s\n", admin.mappings().getMapping(update.getType()).typeAsString(),
                    update.getFilter(), update.getScript(), update.getParams());
        }
    }

    @Override
    public DocSearcher searcher() {
        return searcher;
    }

    @Override
    public void close() throws Exception {
    }
}