Example usage for org.apache.spark.sql.streaming Trigger ProcessingTime

List of usage examples for org.apache.spark.sql.streaming Trigger ProcessingTime

Introduction

In this page you can find the example usage for org.apache.spark.sql.streaming Trigger ProcessingTime.

Prototype

public static Trigger ProcessingTime(String interval) 

Source Link

Document

A trigger policy that runs a query periodically based on an interval in processing time.

Usage

From source file:io.hops.examples.spark.kafka.StructuredStreamingKafka.java

License:Apache License

public static void main(String[] args) throws StreamingQueryException, InterruptedException {
    final String type = args[0];
    //Producer// w  w w.  ja v  a 2s .  c o m
    if (!Strings.isNullOrEmpty(type) && type.equalsIgnoreCase("producer")) {
        Set<String> topicsSet = new HashSet<>(Hops.getTopics());
        SparkConf sparkConf = new SparkConf().setAppName(Hops.getJobName());
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
        final List<HopsProducer> sparkProducers = new ArrayList<>();
        final DateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
        final List<String> messages = new ArrayList();
        final List<String> priorities = new ArrayList();
        final List<String> loggers = new ArrayList();

        /**
         * ********************************* Setup dummy test data ***********************************
         */
        messages.add("Container container_e01_1494850115055_0016_01_000002 succeeded");
        messages.add("Container container_e01_1494850115251_0015_01_000002 succeeded");
        messages.add("rollingMonitorInterval is set as -1. The log rolling mornitoring interval is disabled. "
                + "The logs will be aggregated after this application is finished.");
        messages.add("rollingMonitorInterval is set as -1. The log rolling mornitoring interval is disabled. "
                + "The logs will be aggregated after this application is finished.");
        messages.add("Sending out 2 container statuses: "
                + "[ContainerStatus: [ContainerId: container_e01_1494850115055_0016_01_000001, State: RUNNING, "
                + "Diagnostics: , ExitStatus: -1000, ], "
                + "ContainerStatus: [ContainerId: container_e01_1494850115055_0016_01_000002, "
                + "State: RUNNING, Diagnostics: , ExitStatus: -1000, ]]");
        messages.add("Node's health-status : true");
        messages.add("Cannot create writer for app application_1494433225517_0008. Skip log upload this time.");
        priorities.add("INFO");
        priorities.add("INFO");
        priorities.add("WARN");
        priorities.add("DEBUG");
        priorities.add("DEBUG");
        priorities.add("DEBUG");
        priorities.add("ERROR");
        loggers.add("org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl");
        loggers.add("org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl");
        loggers.add(
                "org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AppLogAggregatorImpl");
        loggers.add("org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl");
        loggers.add("org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl");
        loggers.add("org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl");
        loggers.add(
                "org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AppLogAggregatorImpl");
        //End setup dummy data

        //Get a broker for the producer
        LOG.log(Level.INFO, "Producing to:{0}", Hops.getBrokerEndpointsList().get(0));
        Properties props = new Properties();
        props.put("bootstrap.servers", Hops.getBrokerEndpointsList().get(0));
        for (final String topic : topicsSet) {
            new Thread() {
                @Override
                public void run() {
                    try {
                        SparkProducer sparkProducer = Hops.getSparkProducer(topic, props);
                        sparkProducers.add(sparkProducer);
                        Map<String, String> message = new HashMap<>();
                        int i = 0;
                        //Produce Kafka messages to topic
                        while (true) {
                            message.put("message", messages.get(i % messages.size()));
                            message.put("priority", priorities.get(i % priorities.size()));
                            message.put("logger", loggers.get(i % loggers.size()));
                            Date date = new Date();
                            message.put("timestamp", sdf.format(date));
                            sparkProducer.produce(message);
                            Thread.sleep(100);
                            i++;
                        }
                    } catch (SchemaNotFoundException | CredentialsNotFoundException | InterruptedException ex) {
                        LOG.log(Level.SEVERE, ex.getMessage(), ex);
                    }
                }
            }.start();
        } //Keep application running
        Hops.shutdownGracefully(jsc);
        for (HopsProducer hopsProducer : sparkProducers) {
            hopsProducer.close();
        }
        //Consumer
    } else {
        // Create DataSet representing the stream of input lines from kafka
        DataStreamReader dsr = Hops.getSparkConsumer().getKafkaDataStreamReader();
        Dataset<Row> lines = dsr.load();

        // Generate running word count
        Dataset<LogEntry> logEntries = lines.map(new MapFunction<Row, LogEntry>() {
            @Override
            public LogEntry call(Row record) throws Exception {
                GenericRecord genericRecord = RECORD_INJECTIONS.entrySet().iterator().next().getValue()
                        .invert(record.getAs("value")).get();
                LogEntry logEntry = new LogEntry(genericRecord.get("timestamp").toString(),
                        genericRecord.get("priority").toString(), genericRecord.get("logger").toString(),
                        genericRecord.get("message").toString());
                return logEntry;
            }
        }, Encoders.bean(LogEntry.class));

        Dataset<String> logEntriesRaw = lines.map(new MapFunction<Row, String>() {
            @Override
            public String call(Row record) throws Exception {
                GenericRecord genericRecord = RECORD_INJECTIONS.entrySet().iterator().next().getValue()
                        .invert(record.getAs("value")).get();

                return genericRecord.toString();
            }
        }, Encoders.STRING());

        // Start running the query that prints the running counts to the console
        StreamingQuery queryFile = logEntries.writeStream().format("parquet")
                .option("path",
                        "/Projects/" + Hops.getProjectName() + "/Resources/data-parquet-" + Hops.getAppId())
                .option("checkpointLocation", "/Projects/" + Hops.getProjectName()
                        + "/Resources/checkpoint-parquet-" + Hops.getAppId())
                .trigger(Trigger.ProcessingTime(10000)).start();

        StreamingQuery queryFile2 = logEntriesRaw.writeStream().format("text")
                .option("path",
                        "/Projects/" + Hops.getProjectName() + "/Resources/data-text-" + Hops.getAppId())
                .option("checkpointLocation",
                        "/Projects/" + Hops.getProjectName() + "/Resources/checkpoint-text-" + Hops.getAppId())
                .trigger(Trigger.ProcessingTime(10000)).start();

        Hops.shutdownGracefully(queryFile);
    }
}