Example usage for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER

List of usage examples for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER

Introduction

In this page you can find the example usage for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER.

Prototype

StorageLevel MEMORY_AND_DISK_SER

To view the source code for org.apache.spark.api.java StorageLevels MEMORY_AND_DISK_SER.

Click Source Link

Usage

From source file:SparkToMongo.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*  w  w  w.j a  v a 2  s  . c  o m*/
    }

    String UriMongo = "mongodb://localhost/streamSpark.coll";
    dropDatabase(UriMongo);

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount")
            .set("spark.app.id", "MongoSparkConnectorTour").set("spark.mongodb.input.uri", UriMongo)
            .set("spark.mongodb.output.uri", UriMongo);

    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(5));
    /** Create a JavaReceiverInputDStream on target ip:port and count the
     * words in input stream of \n delimited text (eg. generated by 'nc')
     * Note that no duplication in storage level only for running locally.
     * Replication necessary in distributed scenario for fault tolerance.
     */

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(x -> new Tuple2<>(x, 1))
            .reduceByKey((x, y) -> x + y);

    wordCounts.foreachRDD(rdd -> {

        SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());

        JavaRDD<JavaRecord> rowRDD = rdd.map(palabra -> {
            JavaRecord record = new JavaRecord();
            record.setWord(palabra._1());
            record.setWord1(palabra._2().toString());
            return record;
        });

        Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);

        MongoSpark.write(wordsDataFrame).option("collection", "prueba").mode("append").save();
        wordsDataFrame.show();

    });

    ssc.start();
    ssc.awaitTermination();
}

From source file:JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 3) {
        System.err.println("Usage: JavaNetworkWordCount <app name> <code> <refresh rate in seconds>");
        System.exit(1);//from w  w  w. ja va 2 s .  co m
    }

    String name = args[0];
    String kernel = args[1];
    int refreshRateSeconds = (new Integer(args[2])).intValue();

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a <refreshRateSeconds> second batch size
    SparkConf sparkConf = new SparkConf().setAppName(name);
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(refreshRateSeconds));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(DELIMITER.split(x));
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*from  w  w w  .  j  av a 2  s .  co m*/
    }

    //        StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(x -> new Tuple2<String, Integer>(x, 1))
            .reduceByKey((x, y) -> x + y);

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:NetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*from w  w  w .  j  a  v  a 2  s . c  o m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(3));

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:Assignment.java

License:Apache License

public static void main(String[] args) {

    // Create the context with a 10 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("Assignment");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream("localhost", Integer.parseInt("9999"),
            StorageLevels.MEMORY_AND_DISK_SER);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override//w  w  w . j  a  va2 s. c  o  m
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.filter(new Function<String, Boolean>() {
        public Boolean call(String s) {
            return s.toLowerCase().contains("#obama");
        }
    }).mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    // Reduce function adding two integers, defined separately for clarity
    Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) throws Exception {
            return i1 + i2;
        }
    };

    // Reduce last 30 seconds of data, every 10 seconds
    JavaPairDStream<String, Integer> windowedWordCounts = wordCounts.reduceByKeyAndWindow(reduceFunc,
            new Duration(30000), new Duration(10000));

    windowedWordCounts.print();

    ssc.start();

    ssc.awaitTermination();
}

From source file:cn.com.bsfit.frms.spark.streaming.NetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);// w w  w .  ja  v  a  2s . com
    }

    // StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        private static final long serialVersionUID = 1L;

        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
    ssc.close();
}

From source file:cn.com.warlock.streaming.JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);//from   w ww  . j a  va  2s.  c om
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    //  StreamingContext,??
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // socket stream
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:cn.com.warlock.streaming.JavaSqlNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);//from  ww w .  ja v a 2 s.  co  m
    }

    String hostname = args[0];
    Integer port = Integer.valueOf(args[1]);

    SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // socket stream
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(hostname, port,
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    // Convert RDDs of the words DStream to DataFrame and run SQL query
    words.foreachRDD(new VoidFunction2<JavaRDD<String>, Time>() {

        @Override
        public void call(JavaRDD<String> rdd, Time time) throws Exception {
            // ?SQLContext?
            SQLContext sqlContext = JavaSQLContextSingleton.getInstance(rdd.context());

            // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
            JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
                @Override
                public JavaRecord call(String word) {
                    JavaRecord record = new JavaRecord();
                    record.setWord(word);
                    return record;
                }
            });
            DataFrame wordsDataFrame = sqlContext.createDataFrame(rowRDD, JavaRecord.class);

            //  table
            wordsDataFrame.registerTempTable("words");

            // SQLcount
            DataFrame wordCountsDataFrame = sqlContext
                    .sql("select word, count(*) as total from words group by word");
            System.out.println("========= " + time + "=========");
            wordCountsDataFrame.show();
        }
    });

    ssc.start();
    ssc.awaitTermination();
}

From source file:com.andado.spark.examples.streaming.JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*  w w  w.ja v  a 2 s  . co m*/
    }

    //StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:com.andado.spark.examples.streaming.JavaSqlNetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);//from www.  j a v  a 2  s .c om
    }

    //StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    // Convert RDDs of the words DStream to DataFrame and run SQL query
    words.foreachRDD(new VoidFunction2<JavaRDD<String>, Time>() {
        @Override
        public void call(JavaRDD<String> rdd, Time time) {
            SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());

            // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
            JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
                @Override
                public JavaRecord call(String word) {
                    JavaRecord record = new JavaRecord();
                    record.setWord(word);
                    return record;
                }
            });
            Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);

            // Creates a temporary view using the DataFrame
            wordsDataFrame.createOrReplaceTempView("words");

            // Do word count on table using SQL and print it
            Dataset<Row> wordCountsDataFrame = spark
                    .sql("select word, count(*) as total from words group by word");
            System.out.println("========= " + time + "=========");
            wordCountsDataFrame.show();
        }
    });

    ssc.start();
    ssc.awaitTermination();
}