List of usage examples for org.apache.spark.api.java.function Function2 Function2
Function2
From source file:CacheWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: CacheWordCount <file>"); System.exit(1);/*from ww w . jav a2 s . c o m*/ } SparkConf sparkConf = new SparkConf().setAppName("CacheWordCount"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 4); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); //words.cache(); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); counts.cache(); long num = counts.count(); num = counts.count(); //JavaPairRDD<String, Integer> sorted = counts.sortByKey(); /* List<Tuple2<String, Integer>> output = sorted.collect(); for (Tuple2<?,?> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } */ ctx.stop(); }
From source file:JavaKafkaWordCount_old.java
License:Apache License
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); sparkConf.setMaster("local[2]"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); int numThreads = 1; String zkQuorum = "localhost:5181"; String group = "test-consumer-group"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put("test", numThreads); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);/* w w w . java 2s . c o m*/ JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
From source file:SparkJavaWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: JavaWordCount <file>"); System.exit(1);/*w w w. j av a2s . c o m*/ } SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 1); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); List<Tuple2<String, Integer>> output = counts.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } ctx.stop(); }
From source file:JavaNetworkWordCount.java
License:Apache License
public static void main(String[] args) { if (args.length < 3) { System.err.println("Usage: JavaNetworkWordCount <app name> <code> <refresh rate in seconds>"); System.exit(1);/*from ww w .j a v a 2s .c o m*/ } String name = args[0]; String kernel = args[1]; int refreshRateSeconds = (new Integer(args[2])).intValue(); StreamingExamples.setStreamingLogLevels(); // Create the context with a <refreshRateSeconds> second batch size SparkConf sparkConf = new SparkConf().setAppName(name); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(refreshRateSeconds)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(DELIMITER.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:JavaCustomReceiver.java
License:Apache License
public static void main(String[] args) throws Exception { args = new String[2]; args[0] = "localhost"; args[1] = "12344"; // StreamingExamples.setStreamingLogLevels(); logger.error("hi error"); System.out.println("logging started atleast"); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster(SPARK_MASTER) .setSparkHome("local").setJars(new String[] { "target/SparkProject.0.0.1-SNAPSHOT.jar" }); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); JavaReceiverInputDStream<String> lines = ssc.receiverStream(new JavaCustomReceiver("localhost", 12344)); System.out.println("Received Lines: " + lines.toString()); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { public Iterable<String> call(String x) { System.out.println("Flat map processsing " + x); return Arrays.asList(x.split(" ")); }// www.j ava 2s .c o m }); JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(1000); ssc.start(); ssc.awaitTermination(1000 * 10); }
From source file:SparkHome.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: JavaWordCount <file>"); System.exit(1);//from w w w .j a v a2 s . c o m } SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 2); // JavaRDD<String> counts=lines.flatMap(new FlatMapFunction<String,String>() { // /** // * // */ // private static final long serialVersionUID = 1L; // // public Iterable<String> call(String s){ // return Arrays.asList(s.split("\\s*,\\s*")); // } // // }); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); words.saveAsTextFile("C:\\Users\\i308124\\Desktop\\DB_Comparision_Output\\output_new1.txt"); // List<Tuple2<String, Integer>> output = counts.collect(); // for (Tuple2<?,?> tuple : output) { // System.out.println(tuple._1() + ": " + tuple._2()); // } // // ctx.stop(); }
From source file:OurPi.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("n"); return;// w ww. j ava 2 s. co m } SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); int n = Integer.parseInt(args[0]); System.out.println("n = " + n); List<Integer> l = new ArrayList<Integer>(n); for (int i = 0; i < n; i++) { l.add(i); } JavaRDD<Integer> dataSet = jsc.parallelize(l); int count = dataSet.map(new Function<Integer, Integer>() { @Override public Integer call(Integer integer) { double x = Math.random() * 2 - 1; double y = Math.random() * 2 - 1; return (x * x + y * y < 1) ? 1 : 0; } }).reduce(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) { return integer + integer2; } }); System.out.println("Our Java Pi is roughly " + 4.0 * count / n); }
From source file:NetworkWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1);/* w w w .j ava2 s .c o m*/ } SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(3)); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:SparkKMer.java
License:Apache License
public static void main(String[] args) throws Exception { //Setup//w w w . j a va 2 s.c o m SparkConf sparkConf = new SparkConf().setAppName("SparkKMer"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); //Agrument parsing if (args.length < 2) { System.err.println("Usage: SparkKMer <accession> <kmer-length>"); System.exit(1); } final String acc = args[0]; final int KMER_LENGTH = Integer.parseInt(args[1]); //Check accession and split ReadCollection run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc); long numreads = run.getReadCount(); //Slice the job int chunk = 20000; /** amount of reads per 1 map operation **/ int slices = (int) (numreads / chunk / 1); if (slices == 0) slices = 1; List<LongRange> sub = new ArrayList<LongRange>(); for (long first = 1; first <= numreads;) { long last = first + chunk - 1; if (last > numreads) last = numreads; sub.add(new LongRange(first, last)); first = last + 1; } System.err.println("Prepared ranges: \n" + sub); JavaRDD<LongRange> jobs = jsc.parallelize(sub, slices); //Map // JavaRDD<String> kmers = jobs.flatMap(new FlatMapFunction<LongRange, String>() { ReadCollection run = null; @Override public Iterable<String> call(LongRange s) { //Executes on task nodes List<String> ret = new ArrayList<String>(); try { long first = s.getMinimumLong(); long last = s.getMaximumLong(); if (run == null) { run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc); } ReadIterator it = run.getReadRange(first, last - first + 1, Read.all); while (it.nextRead()) { //iterate through fragments while (it.nextFragment()) { String bases = it.getFragmentBases(); //iterate through kmers for (int i = 0; i < bases.length() - KMER_LENGTH; i++) { ret.add(bases.substring(i, i + KMER_LENGTH)); } } } } catch (ErrorMsg x) { System.err.println(x.toString()); x.printStackTrace(); } return ret; } }); //Initiate kmer counting; JavaPairRDD<String, Integer> kmer_ones = kmers.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); //Reduce counts JavaPairRDD<String, Integer> counts = kmer_ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); //Collect the output List<Tuple2<String, Integer>> output = counts.collect(); for (Tuple2<String, Integer> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } jsc.stop(); }
From source file:SortedWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: SortedWordCount <file>"); System.exit(1);/*from w w w .j a v a 2 s . c om*/ } SparkConf sparkConf = new SparkConf().setAppName("SortedWordCount"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 4); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(SPACE.split(s)); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); JavaPairRDD<String, Integer> sorted = counts.sortByKey(); List<Tuple2<String, Integer>> output = sorted.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } ctx.stop(); }