com.streamsets.datacollector.multiple.TestMultiplePipelinesComplex.java Source code

Java tutorial

Introduction

Here is the source code for com.streamsets.datacollector.multiple.TestMultiplePipelinesComplex.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.datacollector.multiple;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import com.streamsets.datacollector.base.TestMultiplePipelinesBase;
import com.streamsets.datacollector.util.TestUtil;
import com.streamsets.pipeline.lib.KafkaTestUtil;
import com.streamsets.pipeline.lib.util.ThreadUtil;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import org.apache.flume.Channel;
import org.apache.flume.ChannelSelector;
import org.apache.flume.Context;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.channel.MemoryChannel;
import org.apache.flume.channel.ReplicatingChannelSelector;
import org.apache.flume.conf.Configurables;
import org.apache.flume.source.AvroSource;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Ignore
public class TestMultiplePipelinesComplex extends TestMultiplePipelinesBase {

    private static final String TOPIC1 = "KafkaToFlume";
    private static final String TOPIC2 = "KafkaToHDFS";
    private static final String TOPIC3 = "randomToKafka";

    //Flume destination related
    private static AvroSource source;
    private static Channel ch;
    private static Producer<String, String> producer1;
    private static Producer<String, String> producer2;
    private static int flumePort;
    private static ExecutorService e;

    //HDFS
    private static MiniDFSCluster miniDFS;

    private static List<String> getPipelineJson() throws URISyntaxException, IOException {
        //random to kafka
        URI uri = Resources.getResource("kafka_destination_pipeline_operations.json").toURI();
        String randomToKafka = new String(Files.readAllBytes(Paths.get(uri)), StandardCharsets.UTF_8);
        randomToKafka = randomToKafka.replace("topicName", TOPIC3);
        randomToKafka = randomToKafka.replaceAll("localhost:9092", KafkaTestUtil.getMetadataBrokerURI());
        randomToKafka = randomToKafka.replaceAll("localhost:2181", KafkaTestUtil.getZkServer().connectString());

        //kafka to flume pipeline
        uri = Resources.getResource("cluster_kafka_flume.json").toURI();
        String kafkaToFlume = new String(Files.readAllBytes(Paths.get(uri)), StandardCharsets.UTF_8);
        kafkaToFlume = kafkaToFlume.replace("topicName", TOPIC1);
        kafkaToFlume = kafkaToFlume.replaceAll("localhost:9092", KafkaTestUtil.getMetadataBrokerURI());
        kafkaToFlume = kafkaToFlume.replaceAll("localhost:2181", KafkaTestUtil.getZkConnect());
        kafkaToFlume = kafkaToFlume.replaceAll("localhost:9050", "localhost:" + flumePort);
        kafkaToFlume = kafkaToFlume.replaceAll("CLUSTER", "STANDALONE");

        //kafka to hdfs pipeline
        uri = Resources.getResource("cluster_kafka_hdfs.json").toURI();
        String kafkaToHDFS = new String(Files.readAllBytes(Paths.get(uri)), StandardCharsets.UTF_8);
        kafkaToHDFS = kafkaToHDFS.replace("topicName", TOPIC2);
        kafkaToHDFS = kafkaToHDFS.replaceAll("localhost:9092", KafkaTestUtil.getMetadataBrokerURI());
        kafkaToHDFS = kafkaToHDFS.replaceAll("localhost:2181", KafkaTestUtil.getZkConnect());
        kafkaToHDFS = kafkaToHDFS.replaceAll("CLUSTER", "STANDALONE");
        kafkaToHDFS = kafkaToHDFS.replaceAll("/uri", miniDFS.getURI().toString());

        return ImmutableList.of(randomToKafka, kafkaToFlume, kafkaToHDFS);
    }

    @Override
    protected Map<String, String> getPipelineNameAndRev() {
        return ImmutableMap.of("kafka_destination_pipeline", "0", "kafka_origin_pipeline_cluster", "0",
                "cluster_kafka_hdfs", "0");
    }

    /**
     * The extending test must call this method in the method scheduled to run before class
     * @throws Exception
     */
    @BeforeClass
    public static void beforeClass() throws Exception {

        //setup kafka to read from
        KafkaTestUtil.startZookeeper();
        KafkaTestUtil.startKafkaBrokers(1);

        KafkaTestUtil.createTopic(TOPIC1, 1, 1);
        KafkaTestUtil.createTopic(TOPIC2, 1, 1);
        KafkaTestUtil.createTopic(TOPIC3, 1, 1);

        producer1 = KafkaTestUtil.createProducer(KafkaTestUtil.getMetadataBrokerURI(), true);
        producer2 = KafkaTestUtil.createProducer(KafkaTestUtil.getMetadataBrokerURI(), true);

        e = Executors.newFixedThreadPool(2);
        e.submit(new Runnable() {
            @Override
            public void run() {
                int index = 0;
                while (true) {
                    producer1.send(new KeyedMessage<>(TOPIC1, "0", "Hello Kafka" + index));
                    ThreadUtil.sleep(200);
                    index = (index + 1) % 10;
                }
            }
        });

        e.submit(new Runnable() {
            @Override
            public void run() {
                int index = 0;
                while (true) {
                    producer2.send(new KeyedMessage<>(TOPIC2, "0", "Hello Kafka" + index));
                    ThreadUtil.sleep(200);
                    index = (index + 1) % 10;
                }
            }
        });

        //setup flume to write to
        source = new AvroSource();
        ch = new MemoryChannel();
        Configurables.configure(ch, new Context());

        Context context = new Context();
        //This should match whats present in the pipeline.json file
        flumePort = TestUtil.getFreePort();
        context.put("port", String.valueOf(flumePort));
        context.put("bind", "localhost");
        Configurables.configure(source, context);

        List<Channel> channels = new ArrayList<>();
        channels.add(ch);
        ChannelSelector rcs = new ReplicatingChannelSelector();
        rcs.setChannels(channels);
        source.setChannelProcessor(new ChannelProcessor(rcs));
        source.start();

        //HDFS settings
        // setting some dummy kerberos settings to be able to test a mis-setting
        System.setProperty("java.security.krb5.realm", "foo");
        System.setProperty("java.security.krb5.kdc", "localhost:0");

        File minidfsDir = new File("target/minidfs").getAbsoluteFile();
        if (!minidfsDir.exists()) {
            Assert.assertTrue(minidfsDir.mkdirs());
        }
        System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath());
        Configuration conf = new HdfsConfiguration();
        conf.set("hadoop.proxyuser." + System.getProperty("user.name") + ".hosts", "*");
        conf.set("hadoop.proxyuser." + System.getProperty("user.name") + ".groups", "*");
        UserGroupInformation.createUserForTesting("foo", new String[] { "all", "supergroup" });
        EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
        miniDFS = new MiniDFSCluster.Builder(conf).build();

        TestMultiplePipelinesBase.beforeClass(getPipelineJson());
    }

    @AfterClass
    public static void afterClass() throws Exception {
        e.shutdownNow();
        if (miniDFS != null) {
            miniDFS.shutdown();
            miniDFS = null;
        }
        source.stop();
        ch.stop();
        KafkaTestUtil.shutdown();
        TestMultiplePipelinesBase.afterClass();
    }
}