org.apache.streams.elasticsearch.processor.PercolateTagProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.elasticsearch.processor.PercolateTagProcessor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.elasticsearch.processor;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsProcessor;
import org.apache.streams.data.util.ActivityUtil;
import org.apache.streams.pojo.extensions.ExtensionUtil;
import org.apache.streams.elasticsearch.ElasticsearchClientManager;
import org.apache.streams.elasticsearch.ElasticsearchConfiguration;
import org.apache.streams.elasticsearch.ElasticsearchWriterConfiguration;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.pojo.json.Activity;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.percolate.PercolateRequestBuilder;
import org.elasticsearch.action.percolate.PercolateResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;

/**
 * References:
 * Some helpful references to help
 * Purpose              URL
 * -------------        ----------------------------------------------------------------
 * [Status Codes]       http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
 * [Test Cases]         http://greenbytes.de/tech/tc/httpredirects/
 * [t.co behavior]      https://dev.twitter.com/docs/tco-redirection-behavior
 */

public class PercolateTagProcessor implements StreamsProcessor {

    public static final String STREAMS_ID = "PercolateTagProcessor";
    private final static Logger LOGGER = LoggerFactory.getLogger(PercolateTagProcessor.class);
    private final static String DEFAULT_PERCOLATE_FIELD = "_all";

    private ObjectMapper mapper;

    protected Queue<StreamsDatum> inQueue;
    protected Queue<StreamsDatum> outQueue;

    public String TAGS_EXTENSION = "tags";

    private ElasticsearchWriterConfiguration config;
    private ElasticsearchClientManager manager;
    private BulkRequestBuilder bulkBuilder;
    protected String usePercolateField;

    public PercolateTagProcessor(ElasticsearchWriterConfiguration config) {
        this(config, DEFAULT_PERCOLATE_FIELD);
    }

    public PercolateTagProcessor(ElasticsearchWriterConfiguration config, String defaultPercolateField) {
        this.config = config;
        this.usePercolateField = defaultPercolateField;
    }

    public ElasticsearchClientManager getManager() {
        return manager;
    }

    public void setManager(ElasticsearchClientManager manager) {
        this.manager = manager;
    }

    public ElasticsearchConfiguration getConfig() {
        return config;
    }

    public void setConfig(ElasticsearchWriterConfiguration config) {
        this.config = config;
    }

    public Queue<StreamsDatum> getProcessorOutputQueue() {
        return outQueue;
    }

    @Override
    public List<StreamsDatum> process(StreamsDatum entry) {

        List<StreamsDatum> result = Lists.newArrayList();

        String json;
        ObjectNode node;
        // first check for valid json
        if (entry.getDocument() instanceof String) {
            json = (String) entry.getDocument();
            try {
                node = (ObjectNode) mapper.readTree(json);
            } catch (IOException e) {
                e.printStackTrace();
                return null;
            }
        } else if (entry.getDocument() instanceof ObjectNode) {
            node = (ObjectNode) entry.getDocument();
            try {
                json = mapper.writeValueAsString(node);
            } catch (JsonProcessingException e) {
                LOGGER.warn("Invalid datum: ", node);
                return null;
            }
        } else {
            LOGGER.warn("Incompatible document type: ", entry.getDocument().getClass());
            return null;
        }

        StringBuilder percolateRequestJson = new StringBuilder();
        percolateRequestJson.append("{ \"doc\": ");
        percolateRequestJson.append(json);
        //percolateRequestJson.append("{ \"content\" : \"crazy good shit\" }");
        percolateRequestJson.append("}");

        PercolateRequestBuilder request;
        PercolateResponse response;

        try {
            LOGGER.trace("Percolate request json: {}", percolateRequestJson.toString());
            request = manager.getClient().preparePercolate().setIndices(config.getIndex())
                    .setDocumentType(config.getType()).setSource(percolateRequestJson.toString());
            LOGGER.trace("Percolate request: {}", mapper.writeValueAsString(request.request()));
            response = request.execute().actionGet();
            LOGGER.trace("Percolate response: {} matches", response.getMatches().length);
        } catch (Exception e) {
            LOGGER.warn("Percolate exception: {}", e.getMessage());
            return null;
        }

        ArrayNode tagArray = JsonNodeFactory.instance.arrayNode();

        Iterator<PercolateResponse.Match> matchIterator = response.iterator();
        while (matchIterator.hasNext()) {
            tagArray.add(matchIterator.next().getId().string());
        }

        LOGGER.trace("Percolate matches: {}", tagArray);

        Activity activity = mapper.convertValue(node, Activity.class);

        appendMatches(tagArray, activity);

        entry.setDocument(activity);

        result.add(entry);

        return result;

    }

    protected void appendMatches(ArrayNode tagArray, Activity activity) {
        Map<String, Object> extensions = ExtensionUtil.ensureExtensions(activity);

        extensions.put(TAGS_EXTENSION, tagArray);

        activity.setAdditionalProperty(ActivityUtil.EXTENSION_PROPERTY, extensions);
    }

    @Override
    public void prepare(Object o) {

        Preconditions.checkNotNull(config);
        Preconditions.checkNotNull(config.getTags());
        Preconditions.checkArgument(config.getTags().getAdditionalProperties().size() > 0);

        // consider using mapping to figure out what fields are included in _all
        //manager.getClient().admin().indices().prepareGetMappings(config.getIndex()).get().getMappings().get(config.getType()).;

        mapper = StreamsJacksonMapper.getInstance();
        manager = new ElasticsearchClientManager(config);
        bulkBuilder = manager.getClient().prepareBulk();
        createIndexIfMissing(config.getIndex());
        if (config.getReplaceTags() == true) {
            deleteOldQueries(config.getIndex());
        }
        for (String tag : config.getTags().getAdditionalProperties().keySet()) {
            String query = (String) config.getTags().getAdditionalProperties().get(tag);
            PercolateQueryBuilder queryBuilder = new PercolateQueryBuilder(tag, query, this.usePercolateField);
            addPercolateRule(queryBuilder, config.getIndex());
        }
        if (writePercolateRules() == true)
            LOGGER.info(
                    "wrote " + bulkBuilder.numberOfActions() + " tags to " + config.getIndex() + " _percolator");
        else
            LOGGER.error("FAILED writing " + bulkBuilder.numberOfActions() + " tags to " + config.getIndex()
                    + " _percolator");

    }

    @Override
    public void cleanUp() {
        if (config.getCleanupTags() == true)
            deleteOldQueries(config.getIndex());
        manager.getClient().close();
    }

    public int numOfPercolateRules() {
        return this.bulkBuilder.numberOfActions();
    }

    public void createIndexIfMissing(String indexName) {
        if (!this.manager.getClient().admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet()
                .isExists()) {
            // It does not exist... So we are going to need to create the index.
            // we are going to assume that the 'templates' that we have loaded into
            // elasticsearch are sufficient to ensure the index is being created properly.
            CreateIndexResponse response = this.manager.getClient().admin().indices()
                    .create(new CreateIndexRequest(indexName)).actionGet();

            if (response.isAcknowledged()) {
                LOGGER.info(
                        "Index {} did not exist. The index was automatically created from the stored ElasticSearch Templates.",
                        indexName);
            } else {
                LOGGER.error(
                        "Index {} did not exist. While attempting to create the index from stored ElasticSearch Templates we were unable to get an acknowledgement.",
                        indexName);
                LOGGER.error("Error Message: {}", response.toString());
                throw new RuntimeException("Unable to create index " + indexName);
            }
        }
    }

    public void addPercolateRule(PercolateQueryBuilder builder, String index) {
        this.bulkBuilder.add(manager.getClient().prepareIndex(index, ".percolator", builder.getId())
                .setSource(builder.getSource()));
    }

    /**
     *
     * @return returns true if all rules were addded. False indicates one or more rules have failed.
     */
    public boolean writePercolateRules() {
        if (this.numOfPercolateRules() < 0) {
            throw new RuntimeException("No Rules Have been added!");
        }
        BulkResponse response = this.bulkBuilder.execute().actionGet();
        for (BulkItemResponse r : response.getItems()) {
            if (r.isFailed()) {
                System.out.println(r.getId() + "\t" + r.getFailureMessage());
            }
        }
        return !response.hasFailures();
    }

    /**
     *
     * @param ids
     * @param index
     * @return  Returns true if all of the old tags were removed. False indicates one or more tags were not removed.
     */
    public boolean removeOldTags(Set<String> ids, String index) {
        if (ids.size() == 0) {
            return false;
        }
        BulkRequestBuilder bulk = manager.getClient().prepareBulk();
        for (String id : ids) {
            bulk.add(manager.getClient().prepareDelete("_percolator", index, id));
        }
        return !bulk.execute().actionGet().hasFailures();
    }

    public Set<String> getActivePercolateTags(String index) {
        Set<String> tags = new HashSet<String>();
        SearchRequestBuilder searchBuilder = manager.getClient().prepareSearch("*").setIndices(index)
                .setTypes(".percolator").setSize(1000);
        SearchResponse response = searchBuilder.setQuery(QueryBuilders.matchAllQuery()).execute().actionGet();
        SearchHits hits = response.getHits();
        for (SearchHit hit : hits.getHits()) {
            tags.add(hit.id());
        }
        return tags;
    }

    /**
     *
     * @param index
     * @return
     */
    public boolean deleteOldQueries(String index) {
        Set<String> tags = getActivePercolateTags(index);
        if (tags.size() == 0) {
            LOGGER.warn("No active tags were found in _percolator for index : {}", index);
            return false;
        }
        LOGGER.info("Deleting {} tags.", tags.size());
        BulkRequestBuilder bulk = manager.getClient().prepareBulk();
        for (String tag : tags) {
            bulk.add(manager.getClient().prepareDelete().setType(".percolator").setIndex(index).setId(tag));
        }
        BulkResponse response = bulk.execute().actionGet();
        return !response.hasFailures();
    }

    public static class PercolateQueryBuilder {

        private QueryStringQueryBuilder queryBuilder;
        private String id;

        public PercolateQueryBuilder(String id, String query, String defaultPercolateField) {
            this.id = id;
            this.queryBuilder = QueryBuilders.queryString(query);
            this.queryBuilder.defaultField(defaultPercolateField);
        }

        public String getId() {
            return this.id;
        }

        public String getSource() {
            return "{ \n\"query\" : " + this.queryBuilder.toString() + "\n}";
        }

    }

    public enum FilterLevel {
        MUST, SHOULD, MUST_NOT
    }
}