org.apache.samza.sql.bench.utils.DataVerifier.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.samza.sql.bench.utils.DataVerifier.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.samza.sql.bench.utils;

import com.google.common.io.Resources;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DecoderFactory;
import org.apache.commons.cli.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class DataVerifier {
    private static final Logger log = LoggerFactory.getLogger(DataVerifier.class);

    public enum SchemaType {
        ORDERS, PROJECT, FILTER, SLIDINGWINDOW, PRODUCT, JOIN;
    }

    private final String[] args;
    private final Options options = new Options();
    //  private String zkConnectionString = "localhost:2181";
    private String zkConnectionString = "ec2-52-32-116-79.us-west-2.compute.amazonaws.com:2181";
    private String consumerGroupId;
    private String topic;

    public DataVerifier(String[] args) {
        this.args = args;
        options.addOption("v", true, "Verify (ORDERS | PROJECT | FILTER | SLIDINGWINDOW | JOIN)");
        options.addOption("t", true, "Topic");
        options.addOption("z", true, "Zookeeper Conneciton String");
        options.addOption("g", true, "Consumer Group ID");
    }

    public void execute() {
        CommandLineParser cliParser = new DefaultParser();
        CommandLine cmd = null;
        try {
            cmd = cliParser.parse(options, args);
        } catch (ParseException e) {
            throw new RuntimeException("Cannot parse command line arguments: " + Arrays.toString(args));
        }

        if (cmd.hasOption('z')) {
            zkConnectionString = cmd.getOptionValue('z');
        }

        if (cmd.hasOption('g')) {
            consumerGroupId = cmd.getOptionValue('g');
        } else {
            throw new RuntimeException("Missing required parameter [-g] group id.");
        }

        if (cmd.hasOption('t')) {
            topic = cmd.getOptionValue('t').trim();
        } else {
            throw new RuntimeException("Missing required parameter [-t] topic.");
        }

        if (cmd.hasOption('v')) {

            SchemaType type = SchemaType.valueOf(cmd.getOptionValue('v').trim());

            try {
                System.out.printf("Start consuming: " + topic + " consumer group: " + consumerGroupId);
                VerifierConsumer verifierConsumer = new VerifierConsumer(zkConnectionString, consumerGroupId, topic,
                        type);
                System.out.println("Running orders consumer.");
                verifierConsumer.run(2);
                Thread.sleep(600000);
                verifierConsumer.shutdown();
            } catch (Exception e) {
                log.error("Cannot consume orders.", e);
                System.exit(-1);
            }
        } else {
            throw new RuntimeException("Specify the verifier type (Orders | ProjectedOrders | SlidingWindow).");
        }
    }

    private static ConsumerConfig createConsumerConfig(String zkConnect, String groupId) {
        Properties props = new Properties();
        props.put("zookeeper.connect", zkConnect);
        props.put("group.id", groupId);
        props.put("zookeeper.session.timeout.ms", "400");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.commit.interval.ms", "1000");

        return new ConsumerConfig(props);
    }

    public static String loadSchema(SchemaType type) throws IOException {
        switch (type) {
        case ORDERS:
            return Resources.toString(TestDataGenerator.class.getResource("/benchorders.avsc"),
                    Charset.defaultCharset());
        case PROJECT:
            return Resources.toString(TestDataGenerator.class.getResource("/projectout.avsc"),
                    Charset.defaultCharset());
        case SLIDINGWINDOW:
            return Resources.toString(TestDataGenerator.class.getResource("/slidingwindowout.avsc"),
                    Charset.defaultCharset());
        case FILTER:
            return Resources.toString(TestDataGenerator.class.getResource("/benchorders.avsc"),
                    Charset.defaultCharset());
        case JOIN:
            return Resources.toString(TestDataGenerator.class.getResource("/joinout.avsc"),
                    Charset.defaultCharset());
        case PRODUCT:
            return Resources.toString(TestDataGenerator.class.getResource("/product.avsc"),
                    Charset.defaultCharset());
        default:
            throw new RuntimeException("Unknown verifier type: " + type);
        }
    }

    public static void main(String[] args) {
        new DataVerifier(args).execute();
    }

    public static class VerifierConsumer {
        private final ConsumerConnector consumer;
        private final String topic;
        private ExecutorService executor;
        private final SchemaType type;

        public VerifierConsumer(String zkConnect, String groupId, String topic, SchemaType type) {
            this.consumer = kafka.consumer.Consumer
                    .createJavaConsumerConnector(createConsumerConfig(zkConnect, groupId));
            this.topic = topic;
            this.type = type;
        }

        public void shutdown() {
            if (consumer != null)
                consumer.shutdown();
            if (executor != null)
                executor.shutdown();
            try {
                if (!executor.awaitTermination(5000, TimeUnit.MILLISECONDS)) {
                    System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly");
                }
            } catch (InterruptedException e) {
                System.out.println("Interrupted during shutdown, exiting uncleanly");
            }
        }

        public void run(int numThreads) throws IOException {
            Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
            topicCountMap.put(topic, new Integer(numThreads));
            Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer
                    .createMessageStreams(topicCountMap);
            List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);

            System.out.println("Streams size: " + streams.size());
            // now launch all the threads
            //
            executor = Executors.newFixedThreadPool(numThreads);

            // now create an object to consume the messages
            //
            int threadNumber = 0;
            for (final KafkaStream stream : streams) {
                executor.submit(new PrintVerifier(stream, threadNumber, type));
                threadNumber++;
            }
        }
    }

    public static class PrintVerifier implements Runnable {
        private KafkaStream stream;
        private int threadNumber;
        private final GenericDatumReader<GenericRecord> reader;
        private final SchemaType type;

        public PrintVerifier(KafkaStream stream, int threadNumber, SchemaType type) throws IOException {
            this.threadNumber = threadNumber;
            this.stream = stream;
            this.type = type;
            reader = new GenericDatumReader<GenericRecord>(
                    new Schema.Parser().parse(DataVerifier.loadSchema(type)));
        }

        public void run() {
            System.out.println("Start consuming: " + threadNumber);
            ConsumerIterator<byte[], byte[]> it = stream.iterator();
            while (it.hasNext()) {
                GenericRecord record = null;
                try {
                    record = reader.read(null, DecoderFactory.get().binaryDecoder(it.next().message(), null));
                } catch (IOException e) {
                    log.error("Cannot read avro message.", e);
                }
                if (record != null) {
                    switch (type) {
                    case ORDERS:
                        System.out.println("Thread " + threadNumber + ": " + record.get("orderId") + ":"
                                + record.get("units"));
                        break;
                    case PROJECT:
                        System.out.println("Thread " + threadNumber + ": " + record.get("productId") + ":"
                                + record.get("units"));
                        break;
                    case FILTER:
                        System.out.println("Thread " + threadNumber + ": " + record.get("orderId") + ":"
                                + record.get("units"));
                        break;
                    case JOIN:
                        break;
                    case SLIDINGWINDOW:
                        System.out.println("Thread " + threadNumber + " productId: " + record.get("productId")
                                + " unitsLastHour:" + record.get("unitsLastHour"));
                        break;
                    }
                }
            }
            System.out.printf("Shutting down Thread: " + threadNumber);
        }
    }
}