Example usage for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2

List of usage examples for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2

Introduction

In this page you can find the example usage for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2.

Prototype

StorageLevel MEMORY_AND_DISK_SER_2

To view the source code for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER_2.

Click Source Link

Usage

From source file:com.andado.spark.examples.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);/*from   w w  w .  j ava2  s . c o m*/
    }

    //StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            });

    // Update the cumulative count function
    Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.naltel.spark.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);/* w w  w  . j a v a 2 s . c o  m*/
    }

    StreamingExamples.setStreamingLogLevels();

    // Update the cumulative count function
    final Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction = new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
        @Override
        public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
            Integer newSum = state.or(0);
            for (Integer value : values) {
                newSum += value;
            }
            return Optional.of(newSum);
        }
    };

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial RDD input to updateStateByKey
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sc().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    @SuppressWarnings("serial")
    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // This will give a Dstream made of state (which is the cumulative count of the words)
    // JavaPairDStream<String, Integer> stateDstream = wordsDstream.updateStateByKey(updateFunction,
    //         new HashPartitioner(ssc.sc().defaultParallelism()), initialRDD);

    //stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.sdw.dream.spark.examples.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);//from  ww w  .jav  a 2s . c  o  m
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.sparkz.streamcount.WordCount.java

License:Apache License

public static void main(String[] args) {

    SparkConf config = new SparkConf();
    config.setAppName("Word Count");
    Duration batchDuration = new Duration(1000);
    JavaSparkContext ctx = new JavaSparkContext(config);
    JavaSparkContext.jarOfClass(org.apache.spark.streaming.State.class);
    JavaSparkContext.jarOfClass(org.apache.spark.streaming.StateSpec.class);
    ctx.addFile("/home/cloudera/Downloads/spark-streaming_2.10-1.6.0.jar");
    JavaStreamingContext jssc = new JavaStreamingContext(ctx, batchDuration);
    jssc.checkpoint(".");
    final int threshold = Integer.parseInt(args[0]);

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = jssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = jssc.socketTextStream("127.0.0.1", 37337,
            StorageLevels.MEMORY_AND_DISK_SER_2);

    // split each document into words
    JavaDStream<String> tokenized = lines.flatMap(new FlatMapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override//ww  w.  j av  a2  s.c  o  m
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    // count the occurrence of each word
    JavaPairDStream<String, Integer> wordsDstream = tokenized
            .mapToPair(new PairFunction<String, String, Integer>() {
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();

    JavaDStream<Tuple2<String, Integer>> filteredStream = stateDstream
            .filter(new Function<Tuple2<String, Integer>, Boolean>() {

                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Integer> state) throws Exception {
                    return state._2 > threshold;
                }
            });

    filteredStream.print();

    jssc.start();
    jssc.awaitTermination();

    jssc.close();

}

From source file:com.weibangong.spark.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);/*from   w  w w. ja  v  a 2 s.c o  m*/
    }

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1),
            new Tuple2<String, Integer>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordsDstream = words
            .mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            });

    // Update the cumulative count function
    final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {

        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
            int sum = one.or(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum);
            state.update(sum);
            return output;
        }
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:gtl.spark.java.example.apache.streaming.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
        System.exit(1);//from  www.  j ava 2  s. co  m
    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());

    JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(s -> new Tuple2<>(s, 1));

    // Update the cumulative count function
    Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = (word, one,
            state) -> {
        int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
        Tuple2<String, Integer> output = new Tuple2<>(word, sum);
        state.update(sum);
        return output;
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:main.src.examples.JavaStatefulNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setMaster("local[2]").setAppName("JavaStatefulNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    ssc.checkpoint(".");

    // Initial state RDD input to mapWithState
    @SuppressWarnings("unchecked")
    List<Tuple2<String, Integer>> tuples = Arrays.asList();
    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream("localhost", 9999,
            StorageLevels.MEMORY_AND_DISK_SER_2);

    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());

    JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(s -> new Tuple2<>(s, 1));

    // Update the cumulative count function
    Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = (word, one,
            state) -> {//from www  .j  av  a  2  s. c o m
        int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
        Tuple2<String, Integer> output = new Tuple2<>(word, sum);
        state.update(sum);
        return output;
    };

    // DStream made of get cumulative counts that get updated in every batch
    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream
            .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

    stateDstream.print();
    ssc.start();
    ssc.awaitTermination();
}