com.github.sakserv.storm.KafkaHdfsTopologyTest.java Source code

Java tutorial

Introduction

Here is the source code for com.github.sakserv.storm.KafkaHdfsTopologyTest.java

Source

/*
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package com.github.sakserv.storm;

import backtype.storm.Config;
import backtype.storm.topology.TopologyBuilder;
import com.github.sakserv.config.ConfigVars;
import com.github.sakserv.kafka.producer.KafkaReadfileProducer;
import com.github.sakserv.minicluster.impl.*;
import com.github.sakserv.propertyparser.PropertyParser;
import com.github.sakserv.storm.configs.HdfsBoltConfigBuilder;
import com.github.sakserv.storm.configs.KafkaSpoutConfigBuilder;
import com.github.sakserv.storm.configs.StormConfig;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Properties;

import static org.junit.Assert.assertEquals;

public class KafkaHdfsTopologyTest {

    // Logger
    private static final Logger LOG = LoggerFactory.getLogger(KafkaHdfsTopologyTest.class);

    // Kafka input file
    private static final String kafkaProducerInputFile = "test/main/resources/test_input.txt";

    // Setup the property parser
    private static final String testPropsFile = "test.properties";
    private static PropertyParser propertyParser;
    static {
        try {
            propertyParser = new PropertyParser(testPropsFile);
            propertyParser.parsePropsFile();
        } catch (IOException e) {
            LOG.error("Unable to load property file: " + testPropsFile);
        }
    }

    private static ZookeeperLocalCluster zookeeperLocalCluster;
    private static StormLocalCluster stormLocalCluster;
    private static KafkaLocalBroker kafkaLocalBroker;
    private static HdfsLocalCluster hdfsLocalCluster;

    @BeforeClass
    public static void setUp() throws Exception {

        // Start the zookeeper local cluster
        zookeeperLocalCluster = new ZookeeperLocalCluster.Builder()
                .setPort(Integer.parseInt(propertyParser.getProperty(ConfigVars.ZOOKEEPER_PORT_KEY)))
                .setTempDir(propertyParser.getProperty(ConfigVars.ZOOKEEPER_TEMP_DIR_KEY))
                .setZookeeperConnectionString(
                        propertyParser.getProperty(ConfigVars.ZOOKEEPER_CONNECTION_STRING_KEY))
                .build();
        zookeeperLocalCluster.start();

        // Start the Storm local cluster
        stormLocalCluster = new StormLocalCluster.Builder()
                .setZookeeperHost(propertyParser.getProperty(ConfigVars.ZOOKEEPER_HOST_KEY))
                .setZookeeperPort(Long.parseLong(propertyParser.getProperty(ConfigVars.ZOOKEEPER_PORT_KEY)))
                .setEnableDebug(Boolean.parseBoolean(propertyParser.getProperty(ConfigVars.STORM_ENABLE_DEBUG_KEY)))
                .setNumWorkers(Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_NUM_WORKERS_KEY)))
                .setStormConfig(new Config()).build();
        stormLocalCluster.start();

        // Start the kafka local broker
        kafkaLocalBroker = new KafkaLocalBroker.Builder()
                .setKafkaHostname(propertyParser.getProperty(ConfigVars.KAFKA_HOSTNAME_KEY))
                .setKafkaPort(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_PORT_KEY)))
                .setKafkaBrokerId(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_TEST_BROKER_ID_KEY)))
                .setKafkaProperties(new Properties())
                .setKafkaTempDir(propertyParser.getProperty(ConfigVars.KAFKA_TEST_TEMP_DIR_KEY))
                .setZookeeperConnectionString(
                        propertyParser.getProperty(ConfigVars.ZOOKEEPER_CONNECTION_STRING_KEY))
                .build();
        kafkaLocalBroker.start();

        // Start the HDFS local cluster
        hdfsLocalCluster = new HdfsLocalCluster.Builder()
                .setHdfsNamenodePort(
                        Integer.parseInt(propertyParser.getProperty(ConfigVars.HDFS_NAMENODE_PORT_KEY)))
                .setHdfsTempDir(propertyParser.getProperty(ConfigVars.HDFS_TEMP_DIR_KEY))
                .setHdfsNumDatanodes(
                        Integer.parseInt(propertyParser.getProperty(ConfigVars.HDFS_NUM_DATANODES_KEY)))
                .setHdfsEnablePermissions(
                        Boolean.parseBoolean(propertyParser.getProperty(ConfigVars.HDFS_ENABLE_PERMISSIONS_KEY)))
                .setHdfsFormat(Boolean.parseBoolean(propertyParser.getProperty(ConfigVars.HDFS_FORMAT_KEY)))
                .setHdfsEnableRunningUserAsProxyUser(Boolean.parseBoolean(
                        propertyParser.getProperty(ConfigVars.HDFS_ENABLE_RUNNING_USER_AS_PROXY_USER)))
                .setHdfsConfig(new Configuration()).build();
        hdfsLocalCluster.start();

    }

    @AfterClass
    public static void tearDown() throws Exception {

        // Stop storm, wait 10 seconds to left the final flush to hdfs finish
        stormLocalCluster.stop(propertyParser.getProperty(ConfigVars.STORM_TOPOLOGY_NAME_KEY));
        Thread.sleep(5000);

        // Stop kafka, hdfs, and zookeeper
        kafkaLocalBroker.stop();
        hdfsLocalCluster.stop();
        zookeeperLocalCluster.stop();
    }

    @Test
    public void testKafkaHbaseHdfsTopology() throws Exception {

        // Put messages in Kafka
        produceMessageToKafka();

        // Create the topology builder object
        TopologyBuilder topologyBuilder = new TopologyBuilder();

        // Setup the Kafka Spout and add it to the topology
        KafkaSpoutConfigBuilder kafkaSpoutConfigBuilder = getKafkaSpoutConfigBuilder();
        topologyBuilder.setSpout(kafkaSpoutConfigBuilder.getSpoutName(), kafkaSpoutConfigBuilder.getKafkaSpout(),
                kafkaSpoutConfigBuilder.getSpoutParallelismHint());

        // Setup the HDFS Bolt and add it to the topology
        HdfsBoltConfigBuilder hdfsBoltConfigBuilder = getHdfsBoltConfigBuilder();
        topologyBuilder
                .setBolt(hdfsBoltConfigBuilder.getBoltName(), hdfsBoltConfigBuilder.getHdfsBolt(),
                        hdfsBoltConfigBuilder.getBoltParallelism())
                .shuffleGrouping(kafkaSpoutConfigBuilder.getSpoutName());

        // Setup the Storm configuration
        Config stormConfig = StormConfig.createStormConfig(
                Boolean.parseBoolean(propertyParser.getProperty(ConfigVars.STORM_ENABLE_DEBUG_KEY)),
                Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_NUM_WORKERS_KEY)));

        // Submit the topology
        stormLocalCluster.submitTopology(propertyParser.getProperty(ConfigVars.STORM_TOPOLOGY_NAME_KEY),
                stormConfig, topologyBuilder.createTopology());

        // Let the topology run
        Thread.sleep(10000);

        // Validate the results match the input
        validateHdfsResults();

    }

    /**
     * Validate that the files in HDFS contain the expected data from Kafka
     * @throws Exception
     */
    private void validateHdfsResults() throws Exception {
        LOG.info("HDFS: VALIDATING");

        // Get the filesystem handle and a list of files written by the test
        FileSystem hdfsFsHandle = hdfsLocalCluster.getHdfsFileSystemHandle();
        RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(
                new Path(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_OUTPUT_LOCATION_KEY)), true);

        // Loop through the files and count up the lines
        int count = 0;
        while (listFiles.hasNext()) {
            LocatedFileStatus file = listFiles.next();

            LOG.info("HDFS READ: Found File: " + file);

            BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath())));
            String line = br.readLine();
            while (line != null) {
                LOG.info("HDFS READ: Found Line: " + line);
                line = br.readLine();
                count++;
            }
        }
        hdfsFsHandle.close();

        // Validate the number of lines matches the number of kafka messages
        assertEquals(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_TEST_MESSAGE_COUNT_KEY)), count);
    }

    /**
     * Creates a KafkaReadfileProducer and sends the input files contents to Kafka
     */
    private void produceMessageToKafka() {
        KafkaReadfileProducer kafkaReadfileProducer = new KafkaReadfileProducer.Builder()
                .setKafkaHostname(propertyParser.getProperty(ConfigVars.KAFKA_HOSTNAME_KEY))
                .setKafkaPort(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_PORT_KEY)))
                .setTopic(propertyParser.getProperty(ConfigVars.KAFKA_TOPIC_KEY))
                .setInputFileName(kafkaProducerInputFile).build();

        kafkaReadfileProducer.produceMessages();

    }

    /**
     * Returns the KafkaSpoutConfigBuilder ready for use
     * @return KafkaSpoutConfigBuilder Used to construct the kafka spout configuration
     */
    private KafkaSpoutConfigBuilder getKafkaSpoutConfigBuilder() {
        return new KafkaSpoutConfigBuilder.Builder()
                .setZookeeperConnectionString(
                        propertyParser.getProperty(ConfigVars.ZOOKEEPER_CONNECTION_STRING_KEY))
                .setKafkaTopic(propertyParser.getProperty(ConfigVars.KAFKA_TOPIC_KEY))
                .setKafkaStartOffset(propertyParser.getProperty(ConfigVars.STORM_KAFKA_SPOUT_START_OFFSET_KEY))
                .setSpoutName(propertyParser.getProperty(ConfigVars.STORM_KAFKA_SPOUT_NAME_KEY))
                .setSpoutParallelismHint(
                        Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_KAFKA_SPOUT_PARALLELISM_KEY)))
                .setSpoutSchemeClass(propertyParser.getProperty(ConfigVars.STORM_KAFKA_SPOUT_SCHEME_CLASS_KEY))
                .build();
    }

    /**
     * Returns the HdfsBoltConfigBuilder ready for use
     * @return HdfsBoltConfigBuilder Used to construct the kafka spout configuration
     */
    private HdfsBoltConfigBuilder getHdfsBoltConfigBuilder() {
        FileRotationPolicy fileRotationPolicy = HdfsBoltConfigBuilder.getTimeBasedFileRotationPolicy(
                propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_ROTATION_POLICY_UNITS_KEY),
                Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_ROTATION_POLICY_COUNT_KEY)));

        return new HdfsBoltConfigBuilder.Builder()
                .setFieldDelimiter(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_FIELD_DELIMITER_KEY))
                .setOutputLocation(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_OUTPUT_LOCATION_KEY))
                .setHdfsDefaultFs(propertyParser.getProperty(ConfigVars.HDFS_DEFAULT_FS_KEY))
                .setSyncCount(
                        Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_SYNC_COUNT_KEY)))
                .setBoltParallelism(
                        Integer.parseInt(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_PARALLELISM_KEY)))
                .setBoltName(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_NAME_KEY))
                .setFileRotationPolicy(fileRotationPolicy).build();
    }

}