Example usage for org.apache.spark.examples.streaming JavaRecord setWord

List of usage examples for org.apache.spark.examples.streaming JavaRecord setWord

Introduction

In this page you can find the example usage for org.apache.spark.examples.streaming JavaRecord setWord.

Prototype

public void setWord(String word) 

Source Link

Usage

From source file:com.naltel.spark.JavaSqlNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*from  w w  w .  j  ava 2  s .com*/
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    // Convert RDDs of the words DStream to DataFrame and run SQL query
    words.foreachRDD(new Function2<JavaRDD<String>, Time, Void>() {
        @Override
        public Void call(JavaRDD<String> rdd, Time time) {
            SQLContext sqlContext = JavaSQLContextSingleton.getInstance(rdd.context());

            // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
            JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
                public JavaRecord call(String word) {
                    JavaRecord record = new JavaRecord();
                    record.setWord(word);
                    return record;
                }
            });
            DataFrame wordsDataFrame = sqlContext.createDataFrame(rowRDD, JavaRecord.class);

            // Register as table
            wordsDataFrame.registerTempTable("words");

            // Do word count on table using SQL and print it
            DataFrame wordCountsDataFrame = sqlContext
                    .sql("select word, count(*) as total from words group by word");
            System.out.println("========= " + time + "=========");
            wordCountsDataFrame.show();
            return null;
        }
    });

    ssc.start();
    ssc.awaitTermination();
}