com.ottogroup.bi.asap.operator.twitter.consumer.TwitterStreamConsumer.java Source code

Java tutorial

Introduction

Here is the source code for com.ottogroup.bi.asap.operator.twitter.consumer.TwitterStreamConsumer.java

Source

/**
 * Copyright 2014 Otto (GmbH & Co KG)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.ottogroup.bi.asap.operator.twitter.consumer;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.ottogroup.bi.asap.component.ComponentConfiguration;
import com.ottogroup.bi.asap.component.ComponentType;
import com.ottogroup.bi.asap.component.annotation.AsapComponent;
import com.ottogroup.bi.asap.component.source.Source;
import com.ottogroup.bi.asap.component.strategy.config.MessageWaitStrategyConfiguration;
import com.ottogroup.bi.asap.exception.RequiredInputMissingException;
import com.ottogroup.bi.asap.exception.handler.ExceptionHandlerType;
import com.ottogroup.bi.asap.exception.handler.config.ExceptionHandlerConfiguration;
import com.ottogroup.bi.asap.mailbox.Mailbox;
import com.ottogroup.bi.asap.mailbox.config.MailboxConfiguration;
import com.ottogroup.bi.asap.message.StreamingDataMessage;
import com.twitter.hbc.ClientBuilder;
import com.twitter.hbc.core.Constants;
import com.twitter.hbc.core.endpoint.Location;
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint;
import com.twitter.hbc.core.processor.StringDelimitedProcessor;
import com.twitter.hbc.httpclient.BasicClient;
import com.twitter.hbc.httpclient.auth.Authentication;
import com.twitter.hbc.httpclient.auth.OAuth1;

/**
 * Reads data from {@linkplain http://twitter.com} stream and inserts them into the pipeline
 * @author mnxfst
 * @since Nov 28, 2014
 */
@AsapComponent(type = ComponentType.SOURCE, name = "twitterStreamConsumer", version = "0.0.1", description = "Reads data from twitter stream")
public class TwitterStreamConsumer implements Source {

    /** our faithful logging service ;-) */
    private static final Logger logger = Logger.getLogger(TwitterStreamConsumer.class);

    // 2014-09-11T08:01:53.000Z
    private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
    private final ObjectMapper jsonMapper = new ObjectMapper();

    ///////////////////////////////////////////////////////////////////////////////////
    // configuration options 
    public static final String CFG_COMPONENT_ID = "twitter.component.id";
    public static final String CFG_TWITTER_CONSUMER_KEY = "twitter.consumer.key";
    public static final String CFG_TWITTER_CONSUMER_SECRET = "twitter.consumer.secret";
    public static final String CFG_TWITTER_TOKEN_KEY = "twitter.token.key";
    public static final String CFG_TWITTER_TOKEN_SECRET = "twitter.token.secret";
    public static final String CFG_TWITTER_TWEET_SEARCH_TERMS = "twitter.tweet.terms";
    public static final String CFG_TWITTER_TWEET_LANGUAGES = "twitter.tweet.languages";
    public static final String CFG_TWITTER_PROFILES = "twitter.profiles";
    //
    ///////////////////////////////////////////////////////////////////////////////////

    ///////////////////////////////////////////////////////////////////////////////////
    // required stream settings
    /** component identifer */
    private String id = null;
    /** consumer key issued by twitter.com */
    private String consumerKey = null;
    /** consumer secrect issued by twitter.com */
    private String consumerSecret = null;
    /** token key issued by twitter.com */
    private String tokenKey = null;
    /** token secret issued by twitter.com */
    private String tokenSecret = null;
    /** terms to search for in twitter status stream - applied across all status updates */
    private final List<String> searchTerms = new ArrayList<>();
    /** twitter profiles to retrieve data from - applied in addition to search terms, data will be merged */
    private final List<Long> profiles = new ArrayList<>();
    /** languages to filter twitter stream for */
    private final List<String> languages = new ArrayList<>();
    /** locations to filter twitter stream for */
    private final List<Location> locations = new ArrayList<>();
    /** internal message queue used for buffering before data is being handed over to publisher */
    private final BlockingQueue<String> streamMessageQueue = new LinkedBlockingQueue<String>(100000);
    //
    ///////////////////////////////////////////////////////////////////////////////////

    /** mailbox */
    private Mailbox mailbox = null;
    /** client handling communication with stream.twitter.com */
    private BasicClient twitterClient = null;
    /** state */
    private boolean isRunning = false;
    /** message counter */
    private long numProcessedMessages = 0;

    /**
     * @see com.ottogroup.bi.asap.component.Component#init(java.util.Properties)
     */
    public void init(Properties props) throws RequiredInputMissingException {

        if (props == null || props.isEmpty())
            throw new RequiredInputMissingException("Missing required configuration");

        //////////////////////////////////////////////////////////
        // extract required configurational data 
        this.consumerKey = props.getProperty(CFG_TWITTER_CONSUMER_KEY);
        this.consumerSecret = props.getProperty(CFG_TWITTER_CONSUMER_SECRET);
        this.tokenKey = props.getProperty(CFG_TWITTER_TOKEN_KEY);
        this.tokenSecret = props.getProperty(CFG_TWITTER_TOKEN_SECRET);
        this.id = props.getProperty(CFG_COMPONENT_ID);

        String inSearchTerms = props.getProperty(CFG_TWITTER_TWEET_SEARCH_TERMS);
        String[] splittedSearchTerms = (inSearchTerms != null ? inSearchTerms.split(",") : null);
        if (splittedSearchTerms != null) {
            for (String sst : splittedSearchTerms) {
                this.searchTerms.add(StringUtils.trim(sst));
            }
        }

        String inLanguages = props.getProperty(CFG_TWITTER_TWEET_LANGUAGES);
        String[] splittedLanguages = (inLanguages != null ? inLanguages.split(",") : null);
        if (splittedLanguages != null) {
            for (String s : splittedLanguages) {
                this.languages.add(StringUtils.trim(s));
            }
        }

        String inProfiles = props.getProperty(CFG_TWITTER_PROFILES);
        String[] splittedProfiles = (inProfiles != null ? inProfiles.split(",") : null);
        if (splittedProfiles != null) {
            for (String sp : splittedProfiles) {
                if (StringUtils.isNotBlank(sp)) {
                    try {
                        this.profiles.add(Long.parseLong(sp.trim()));
                    } catch (Exception e) {
                        logger.error("Failed to parse profile identifier from input '" + sp + "'");
                    }
                }
            }
        }
        //
        //////////////////////////////////////////////////////////

        ////////////////////////////////////////////////////////////////////////////////////////////
        // validate provided input before attempting to establish connection with stream.twitter.com
        if (StringUtils.isBlank(id))
            throw new RequiredInputMissingException("Missing required component identifier");
        if (StringUtils.isBlank(this.consumerKey))
            throw new RequiredInputMissingException(
                    "Missing required consumer key to establish connection with stream.twitter.com");
        if (StringUtils.isBlank(this.consumerSecret))
            throw new RequiredInputMissingException(
                    "Missing required consumer secrect to establish connection with stream.twitter.com");
        if (StringUtils.isBlank(this.tokenKey))
            throw new RequiredInputMissingException(
                    "Missing required token key to establish connection with stream.twitter.com");
        if (StringUtils.isBlank(this.tokenSecret))
            throw new RequiredInputMissingException(
                    "Missing required token secret to establish connection with stream.twitter.com");

        boolean isFilterTermsEmpty = (this.searchTerms == null || this.searchTerms.isEmpty());
        boolean isLanguagesEmpty = (this.languages == null || this.languages.isEmpty());
        boolean isUserAccountEmpty = (this.profiles == null || this.profiles.isEmpty());
        boolean isLocationsEmpty = (this.locations == null || this.locations.isEmpty());

        if (isFilterTermsEmpty && isLanguagesEmpty && isUserAccountEmpty && isLocationsEmpty)
            throw new RequiredInputMissingException(
                    "Mishandle sing information what to filter twitter stream for: terms, languages, user accounts or locations");
        //
        ////////////////////////////////////////////////////////////////////////////////////////////

        //////////////////////////////////////////////////////////
        // establish connection with stream.twitter.com
        Authentication auth = new OAuth1(this.consumerKey, this.consumerSecret, this.tokenKey, this.tokenSecret);
        StatusesFilterEndpoint filterEndpoint = new StatusesFilterEndpoint();
        if (!isFilterTermsEmpty)
            filterEndpoint.trackTerms(searchTerms);
        if (!isLanguagesEmpty)
            filterEndpoint.languages(languages);
        if (!isUserAccountEmpty)
            filterEndpoint.followings(profiles);
        if (!isLocationsEmpty)
            filterEndpoint.locations(locations);

        if (this.twitterClient == null) {
            this.twitterClient = new ClientBuilder().name(id).hosts(Constants.STREAM_HOST).endpoint(filterEndpoint)
                    .authentication(auth).processor(new StringDelimitedProcessor(streamMessageQueue)).build();
            this.twitterClient.connect();
        }
        //
        //////////////////////////////////////////////////////////

        this.isRunning = true;

    }

    /**
     * @see com.ottogroup.bi.asap.node.pipeline.component.DataComponent#shutdown()
     */
    public boolean shutdown() {
        this.isRunning = false;
        return true;
    }

    /**
     * @see java.lang.Runnable#run()
     */
    public void run() {

        if (logger.isDebugEnabled())
            logger.debug("twitter stream consumer initialized [id=" + id + "]");

        // keep on consuming until either the consumer or the client is interrupted  
        while (this.isRunning && !this.twitterClient.isDone()) {
            try {
                String msg = streamMessageQueue.poll(100, TimeUnit.MILLISECONDS);
                if (msg != null) {
                    this.mailbox.insert(new StreamingDataMessage(this.id, timing(msg), System.currentTimeMillis()));
                    this.numProcessedMessages++;
                    // TODO implement back pressure handling
                }
            } catch (InterruptedException e) {
            }
        }

        // stop the twitter client in case the consumer has been interrupted by external signal
        if (this.twitterClient != null && !this.twitterClient.isDone())
            this.twitterClient.stop();

        logger.info("twitter stream consumer received " + this.numProcessedMessages + " messages");
    }

    protected String timing(String msg) {
        try {
            ObjectNode node = (ObjectNode) jsonMapper.readTree(msg);
            long timestampMillis = node.get("timestamp_ms").asLong();
            node.put("@timestamp", sdf.format(new Date(timestampMillis)));
            return jsonMapper.writeValueAsString(node);
        } catch (IOException e) {
            //
        }
        return msg;
    }

    /**
     * @see com.ottogroup.bi.asap.component.Component#getId()
     */
    public String getId() {
        return this.id;
    }

    /**
     * @see com.ottogroup.bi.asap.component.Component#setId(java.lang.String)
     */
    public void setId(String id) {
        this.id = id;
    }

    /**
     * @see com.ottogroup.bi.asap.component.Component#getTotalNumOfMessages()
     */
    public long getTotalNumOfMessages() {
        return numProcessedMessages;
    }

    /**
     * @see com.ottogroup.bi.asap.component.source.Source#setMailbox(com.ottogroup.bi.asap.mailbox.Mailbox)
     */
    public void setMailbox(Mailbox mailbox) {
        this.mailbox = mailbox;
    }

    ///////////////////////////////////////////////////////////////////////
    // REQUIRED FOR TESTING ONLY

    protected String getConsumerKey() {
        return consumerKey;
    }

    protected String getConsumerSecret() {
        return consumerSecret;
    }

    protected String getTokenKey() {
        return tokenKey;
    }

    protected String getTokenSecret() {
        return tokenSecret;
    }

    protected List<Long> getProfiles() {
        return profiles;
    }

    protected List<String> getLanguages() {
        return languages;
    }

    protected List<Location> getLocations() {
        return locations;
    }

    protected boolean isRunning() {
        return isRunning;
    }

    protected List<String> getSearchTerms() {
        return searchTerms;
    }

    protected BlockingQueue<String> getStreamMessageQueue() {
        return streamMessageQueue;
    }

    protected void setTwitterClient(BasicClient twitterClient) {
        this.twitterClient = twitterClient;
    }

    protected void setRunning(boolean isRunning) {
        this.isRunning = isRunning;
    }

    //
    ///////////////////////////////////////////////////////////////////////

    public static void main(String[] args) throws Exception {
        ObjectMapper m = new ObjectMapper();
        m.enable(SerializationFeature.INDENT_OUTPUT);

        ComponentConfiguration cfg = new ComponentConfiguration("twitter-stream-reader", ComponentType.SOURCE,
                "twitterStreamConsumer", "0.0.1");
        cfg.setComponentExceptionHandler(
                new ExceptionHandlerConfiguration(ExceptionHandlerType.COMPONENT_EXCEPTION_HANDLER,
                        "twitterComponentExceptionHandler", "log4jExceptionHandler", "0.0.1"));
        cfg.setExecutorExceptionHandler(
                new ExceptionHandlerConfiguration(ExceptionHandlerType.EXECUTOR_EXCEPTION_HANDLER,
                        "twitterExecutorExceptionHandler", "log4jExceptionHandler", "0.0.1"));
        cfg.setMessageWaitStrategy(new MessageWaitStrategyConfiguration("twitterExecutorWaitStrategy",
                "sleepingMessageWaitStrategy", "0.0.1"));
        cfg.setMailbox(
                new MailboxConfiguration("twitterConsumerMailbox", "oneToOneConcurrentArrayQueueMailbox", "0.0.1"));
        cfg.addSubscription("simpleFilteringOperator");

        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_CONSUMER_KEY, "<consumer_id>");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_CONSUMER_SECRET, "<consumer_secret>");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_PROFILES, "1,2,3");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TOKEN_KEY, "<token_key>");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TOKEN_SECRET, "<token_secret>");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TWEET_LANGUAGES, "DE,FR,EN");
        cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TWEET_SEARCH_TERMS, "SOCCER,FOOTBALL,FUSSBALL");

        System.out.println(m.writeValueAsString(cfg));
    }

}