Example usage for org.apache.spark.api.java.function Function3 Function3

List of usage examples for org.apache.spark.api.java.function Function3 Function3

Introduction

In this page you can find the example usage for org.apache.spark.api.java.function Function3 Function3.

Prototype

Function3

Source Link

Usage

From source file:com.andado.spark.examples.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);/*from w  w  w . j  a  va2 s. c o  m*/
    }

    //StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            });

    // Update the cumulative count function
    Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.sdw.dream.spark.examples.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);/*from  w w  w .j a va  2  s . c o m*/
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.sparkz.streamcount.WordCount.java

License:Apache License

public static void main(String[] args) {

    SparkConf config = new SparkConf();
    config.setAppName("Word Count");
    Duration batchDuration = new Duration(1000);
    JavaSparkContext ctx = new JavaSparkContext(config);
    JavaSparkContext.jarOfClass(org.apache.spark.streaming.State.class);
    JavaSparkContext.jarOfClass(org.apache.spark.streaming.StateSpec.class);
    ctx.addFile("/home/cloudera/Downloads/spark-streaming_2.10-1.6.0.jar");
    JavaStreamingContext jssc = new JavaStreamingContext(ctx, batchDuration);
    jssc.checkpoint(".");
    final int threshold = Integer.parseInt(args[0]);

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = jssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = jssc.socketTextStream("127.0.0.1", 37337,
            StorageLevels.MEMORY_AND_DISK_SER_2);

    // split each document into words
    JavaDStream<String> tokenized = lines.flatMap(new FlatMapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override/*from w  ww  .  j a  va 2  s  . c  o  m*/
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    // count the occurrence of each word
    JavaPairDStream<String, Integer> wordsDstream = tokenized
            .mapToPair(new PairFunction<String, String, Integer>() {
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();

    JavaDStream<Tuple2<String, Integer>> filteredStream = stateDstream
            .filter(new Function<Tuple2<String, Integer>, Boolean>() {

                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Integer> state) throws Exception {
                    return state._2 > threshold;
                }
            });

    filteredStream.print();

    jssc.start();
    jssc.awaitTermination();

    jssc.close();

}

From source file:com.weibangong.spark.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);//from  w  ww.  j  a v a2 s.  c o  m
    }

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}