edu.isi.mtandao.twitter.StreamCrawler.java Source code

Java tutorial

Introduction

Here is the source code for edu.isi.mtandao.twitter.StreamCrawler.java

Source

/*
 * Mtandao: A Social Media Toolkit
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0 
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package edu.isi.mtandao.twitter;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.scribe.builder.ServiceBuilder;
import org.scribe.builder.api.TwitterApi;
import org.scribe.model.OAuthRequest;
import org.scribe.model.Response;
import org.scribe.model.Token;
import org.scribe.model.Verb;
import org.scribe.oauth.OAuthService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.isi.mtandao.handle.Handler;

/**
 * @author metzler
 *
 */
public abstract class StreamCrawler {

    // logging
    private static final Logger LOGGER = LoggerFactory.getLogger(StreamCrawler.class);

    // HTTP "STATUS OK" code
    private static final int HTTP_STATUS_OK = 200;

    // default handler class
    private static final String DEFAULT_HANDLER = "edu.isi.mtandao.handle.StdoutHandler";

    // Twitter API URL to verify a user credentials
    private static final String VERIFY_CREDENTIALS_URL = "https://api.twitter.com/1/account/verify_credentials.json";

    // set up command line options
    protected static Options CLI_OPTIONS = new Options();

    static {
        // crawler class
        CLI_OPTIONS.addOption("crawlerClass", true, "Crawler class name.");

        // api key, secret options
        CLI_OPTIONS.addOption("apiKey", true, "Twitter API key.");
        CLI_OPTIONS.addOption("apiSecret", true, "Twitter API secret.");

        // access token key, secret options
        CLI_OPTIONS.addOption("accessToken", true, "Twitter access token.");
        CLI_OPTIONS.addOption("accessSecret", true, "Twitter access token secret.");

        // handler options
        CLI_OPTIONS.addOption("handlerClass", true, "Handler class name (optional).");
        CLI_OPTIONS.addOption("handlerArgs", true, "Handler argument string (optional).");
    }

    // Twitter OAuth service
    private OAuthService mService = null;

    // handler
    private Handler mHandler = null;

    // api key and secret
    private String mApiKey = null;
    private String mApiSecret = null;

    // access token
    private Token mAccessToken = null;

    public StreamCrawler() {
        /* do nothing */
    }

    protected void initialize(CommandLine cmd)
            throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        // api key and secret
        mApiKey = cmd.getOptionValue("apiKey");
        mApiSecret = cmd.getOptionValue("apiSecret");

        // access token and secret
        String accessToken = cmd.getOptionValue("accessToken");
        String accessSecret = cmd.getOptionValue("accessSecret");

        // access token
        mAccessToken = new Token(accessToken, accessSecret);

        // handler class name and argument string
        String handlerClass = cmd.getOptionValue("handlerClass");
        String handlerArgs = cmd.getOptionValue("handlerArgs");

        // check the command line arguments
        if (mApiKey == null || mApiSecret == null || accessToken == null || accessSecret == null) {
            throw new IllegalArgumentException("Missing required arguments!");
        }

        // initialize handler
        if (handlerClass == null || handlerArgs == null) {
            LOGGER.warn("No handler class and/or arguments specified. Using defaults.");

            handlerClass = DEFAULT_HANDLER;
            handlerArgs = "";
        }

        mHandler = Handler.createHandler(handlerClass, handlerArgs);
    }

    // crawl the Twitter Streaming API
    public void crawl() {
        // establish OAuth service
        establishService();

        // verify provided account credentials
        if (!verifyCredentials()) {
            throw new RuntimeException("Unable to verify credentials -- check your access token and secret.");
        }

        // process tweets from the stream
        processStream();
    }

    // establish the Twitter OAuth service
    private void establishService() {
        mService = new ServiceBuilder().provider(TwitterApi.class).apiKey(mApiKey).apiSecret(mApiSecret).build();
    }

    // verify user credentials
    private boolean verifyCredentials() {
        // create the OAuth signed request to verify our credentials with the Twitter API
        OAuthRequest request = new OAuthRequest(Verb.GET, VERIFY_CREDENTIALS_URL);
        mService.signRequest(mAccessToken, request);
        Response response = request.send();

        // check response code
        if (response.getCode() != HTTP_STATUS_OK) {
            return false;
        }

        return true;
    }

    // create the OAuth signed request to access the Twitter Streaming API
    protected OAuthRequest getOAuthRequest(Verb verb, String url, boolean keepAlive) {
        OAuthRequest request = new OAuthRequest(verb, url);
        request.setConnectionKeepAlive(keepAlive);
        mService.signRequest(mAccessToken, request);
        return request;
    }

    // process (indefinitely) from Twitter Streaming API
    protected void processStream() {
        // current status code
        int status = HTTP_STATUS_OK;

        // current delay (if any)
        int delay = 0;

        while (true) { // purposeful infinite loop
            if (status > HTTP_STATUS_OK) {
                LOGGER.warn("The Twitter API returned the following HTTP status code: " + status);
                delay = (delay == 0) ? 10000 : Math.min(240000, 2 * delay); // per twitter's suggestion
            }

            if (delay != 0) {
                LOGGER.warn("Waiting " + delay + " milliseconds before reconnecting...");
                try {
                    Thread.sleep(delay);
                } catch (InterruptedException e) {
                    /* do nothing */
                }
            }

            // create the OAuth signed request to access the Twitter Streaming API
            String requestURL = getRequestURL();
            LOGGER.info("Sending request: " + requestURL);
            OAuthRequest request = getOAuthRequest(Verb.GET, requestURL, true);
            Response response = request.send();

            status = response.getCode();
            if (status != HTTP_STATUS_OK) {
                continue;
            }

            // read from the stream
            BufferedReader reader = new BufferedReader(new InputStreamReader(response.getStream()));

            try {
                String line;
                while ((line = reader.readLine()) != null) {
                    if (!line.isEmpty()) {
                        mHandler.handle(line);
                    }
                }
            } catch (IOException e) {
                LOGGER.warn("An IOException was caught. Details: " + status);
                delay = (delay == 0) ? 250 : Math.min(16000, delay + 250); // per twitter's suggestion
            }

            // close the stream
            try {
                reader.close();
            } catch (IOException e) {
                // do nothing
            }
        }
    }

    // returns the Twitter API URL that will be processed as a stream
    protected abstract String getRequestURL();

    // generic main method that can be used by all stream crawlers
    public static void main(String[] args)
            throws ParseException, InstantiationException, IllegalAccessException, ClassNotFoundException {
        // extract command line arguments
        CommandLineParser parser = new PosixParser();
        CommandLine cmd = parser.parse(CLI_OPTIONS, args);

        // get crawler class
        String crawlerClass = cmd.getOptionValue("crawlerClass");
        if (crawlerClass == null) {
            printUsage(CLI_OPTIONS);
        }

        // instantiate and initialize the crawler
        StreamCrawler crawler = (StreamCrawler) Class.forName(crawlerClass).newInstance();

        try {
            crawler.initialize(cmd);
        } catch (Exception e) {
            printUsage(CLI_OPTIONS);
        }

        // start crawling
        crawler.crawl();
    }

    private static void printUsage(Options options) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("StreamCrawler", options);
        System.exit(-1);
    }

}