List of usage examples for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2
StorageLevel MEMORY_AND_DISK_SER_2
To view the source code for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2.
Click Source Link
From source file:com.andado.spark.examples.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/*from w w w . j ava2 s . c o m*/ } //StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.naltel.spark.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/* w w w . j a v a 2 s . c o m*/ } StreamingExamples.setStreamingLogLevels(); // Update the cumulative count function final Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction = new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { @Override public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { Integer newSum = state.or(0); for (Integer value : values) { newSum += value; } return Optional.of(newSum); } }; // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial RDD input to updateStateByKey @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sc().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); @SuppressWarnings("serial") JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // This will give a Dstream made of state (which is the cumulative count of the words) // JavaPairDStream<String, Integer> stateDstream = wordsDstream.updateStateByKey(updateFunction, // new HashPartitioner(ssc.sc().defaultParallelism()), initialRDD); //stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sdw.dream.spark.examples.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);//from ww w .jav a 2s . c o m } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sparkz.streamcount.WordCount.java
License:Apache License
public static void main(String[] args) { SparkConf config = new SparkConf(); config.setAppName("Word Count"); Duration batchDuration = new Duration(1000); JavaSparkContext ctx = new JavaSparkContext(config); JavaSparkContext.jarOfClass(org.apache.spark.streaming.State.class); JavaSparkContext.jarOfClass(org.apache.spark.streaming.StateSpec.class); ctx.addFile("/home/cloudera/Downloads/spark-streaming_2.10-1.6.0.jar"); JavaStreamingContext jssc = new JavaStreamingContext(ctx, batchDuration); jssc.checkpoint("."); final int threshold = Integer.parseInt(args[0]); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = jssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = jssc.socketTextStream("127.0.0.1", 37337, StorageLevels.MEMORY_AND_DISK_SER_2); // split each document into words JavaDStream<String> tokenized = lines.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; @Override//ww w. j av a2 s.c o m public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); // count the occurrence of each word JavaPairDStream<String, Integer> wordsDstream = tokenized .mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); JavaDStream<Tuple2<String, Integer>> filteredStream = stateDstream .filter(new Function<Tuple2<String, Integer>, Boolean>() { private static final long serialVersionUID = 1L; @Override public Boolean call(Tuple2<String, Integer> state) throws Exception { return state._2 > threshold; } }); filteredStream.print(); jssc.start(); jssc.awaitTermination(); jssc.close(); }
From source file:com.weibangong.spark.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/*from w w w. ja v a 2 s.c o m*/ } // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:gtl.spark.java.example.apache.streaming.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);//from www. j ava 2 s. co m } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator()); JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(s -> new Tuple2<>(s, 1)); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = (word, one, state) -> { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:main.src.examples.JavaStatefulNetworkWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setMaster("local[2]").setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream("localhost", 9999, StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator()); JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(s -> new Tuple2<>(s, 1)); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = (word, one, state) -> {//from www .j av a 2 s. c o m int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }