List of usage examples for org.apache.spark.api.java.function Function3 Function3
Function3
From source file:com.andado.spark.examples.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/*from w w w . j a va2 s. c o m*/ } //StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sdw.dream.spark.examples.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/*from w w w .j a va 2 s . c o m*/ } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sparkz.streamcount.WordCount.java
License:Apache License
public static void main(String[] args) { SparkConf config = new SparkConf(); config.setAppName("Word Count"); Duration batchDuration = new Duration(1000); JavaSparkContext ctx = new JavaSparkContext(config); JavaSparkContext.jarOfClass(org.apache.spark.streaming.State.class); JavaSparkContext.jarOfClass(org.apache.spark.streaming.StateSpec.class); ctx.addFile("/home/cloudera/Downloads/spark-streaming_2.10-1.6.0.jar"); JavaStreamingContext jssc = new JavaStreamingContext(ctx, batchDuration); jssc.checkpoint("."); final int threshold = Integer.parseInt(args[0]); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = jssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = jssc.socketTextStream("127.0.0.1", 37337, StorageLevels.MEMORY_AND_DISK_SER_2); // split each document into words JavaDStream<String> tokenized = lines.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; @Override/*from w ww . j a va 2 s . c o m*/ public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); // count the occurrence of each word JavaPairDStream<String, Integer> wordsDstream = tokenized .mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); JavaDStream<Tuple2<String, Integer>> filteredStream = stateDstream .filter(new Function<Tuple2<String, Integer>, Boolean>() { private static final long serialVersionUID = 1L; @Override public Boolean call(Tuple2<String, Integer> state) throws Exception { return state._2 > threshold; } }); filteredStream.print(); jssc.start(); jssc.awaitTermination(); jssc.close(); }
From source file:com.weibangong.spark.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);//from w ww. j a v a2 s. c o m } // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }