Example usage for org.apache.spark.api.java.function Function2 Function2

List of usage examples for org.apache.spark.api.java.function Function2 Function2

Introduction

In this page you can find the example usage for org.apache.spark.api.java.function Function2 Function2.

Prototype

Function2

Source Link

Usage

From source file:CacheWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: CacheWordCount <file>");
        System.exit(1);/*from   ww  w . jav a2 s .  c o m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("CacheWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 4);

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });
    //words.cache();

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });
    counts.cache();

    long num = counts.count();
    num = counts.count();

    //JavaPairRDD<String, Integer> sorted = counts.sortByKey();

    /*
    List<Tuple2<String, Integer>> output = sorted.collect();
    for (Tuple2<?,?> tuple : output) {
      System.out.println(tuple._1() + ": " + tuple._2());
    }
    */
    ctx.stop();
}

From source file:JavaKafkaWordCount_old.java

License:Apache License

public static void main(String[] args) {

    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    sparkConf.setMaster("local[2]");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

    int numThreads = 1;
    String zkQuorum = "localhost:5181";
    String group = "test-consumer-group";
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put("test", numThreads);

    JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group,
            topicMap);/* w w  w  . java 2s .  c o m*/

    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}

From source file:SparkJavaWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);/*w  w  w.  j  av  a2s .  c o m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    List<Tuple2<String, Integer>> output = counts.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + ": " + tuple._2());
    }
    ctx.stop();
}

From source file:JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 3) {
        System.err.println("Usage: JavaNetworkWordCount <app name> <code> <refresh rate in seconds>");
        System.exit(1);/*from ww  w  .j  a v  a 2s .c  o  m*/
    }

    String name = args[0];
    String kernel = args[1];
    int refreshRateSeconds = (new Integer(args[2])).intValue();

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a <refreshRateSeconds> second batch size
    SparkConf sparkConf = new SparkConf().setAppName(name);
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(refreshRateSeconds));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(DELIMITER.split(x));
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:JavaCustomReceiver.java

License:Apache License

public static void main(String[] args) throws Exception {
    args = new String[2];
    args[0] = "localhost";
    args[1] = "12344";
    // StreamingExamples.setStreamingLogLevels();
    logger.error("hi error");
    System.out.println("logging started atleast");
    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster(SPARK_MASTER)
            .setSparkHome("local").setJars(new String[] { "target/SparkProject.0.0.1-SNAPSHOT.jar" });
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));

    JavaReceiverInputDStream<String> lines = ssc.receiverStream(new JavaCustomReceiver("localhost", 12344));
    System.out.println("Received Lines: " + lines.toString());

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        public Iterable<String> call(String x) {
            System.out.println("Flat map processsing " + x);
            return Arrays.asList(x.split(" "));
        }// www.j  ava 2s .c o  m
    });
    JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print(1000);
    ssc.start();
    ssc.awaitTermination(1000 * 10);

}

From source file:SparkHome.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);//from   w w w .j  a v  a2 s  . c  o  m
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 2);

    //    JavaRDD<String> counts=lines.flatMap(new FlatMapFunction<String,String>() {
    //         /**
    //        * 
    //        */
    //       private static final long serialVersionUID = 1L;
    //
    //       public Iterable<String> call(String s){
    //            return Arrays.asList(s.split("\\s*,\\s*"));
    //         }
    //
    //    }); 

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    words.saveAsTextFile("C:\\Users\\i308124\\Desktop\\DB_Comparision_Output\\output_new1.txt");

    //    List<Tuple2<String, Integer>> output = counts.collect();
    //    for (Tuple2<?,?> tuple : output) {
    //      System.out.println(tuple._1() + ": " + tuple._2());
    //    }
    //    
    //    
    ctx.stop();
}

From source file:OurPi.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("n");
        return;// w  ww.  j  ava 2 s. co  m
    }
    SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);

    int n = Integer.parseInt(args[0]);
    System.out.println("n = " + n);
    List<Integer> l = new ArrayList<Integer>(n);
    for (int i = 0; i < n; i++) {
        l.add(i);
    }

    JavaRDD<Integer> dataSet = jsc.parallelize(l);

    int count = dataSet.map(new Function<Integer, Integer>() {
        @Override
        public Integer call(Integer integer) {
            double x = Math.random() * 2 - 1;
            double y = Math.random() * 2 - 1;
            return (x * x + y * y < 1) ? 1 : 0;
        }
    }).reduce(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer integer, Integer integer2) {
            return integer + integer2;
        }
    });

    System.out.println("Our Java Pi is roughly " + 4.0 * count / n);
}

From source file:NetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/* w  w w .j  ava2 s .c  o m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(3));

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:SparkKMer.java

License:Apache License

public static void main(String[] args) throws Exception {
    //Setup//w  w  w  . j  a  va 2 s.c o m
    SparkConf sparkConf = new SparkConf().setAppName("SparkKMer");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    //Agrument parsing
    if (args.length < 2) {
        System.err.println("Usage: SparkKMer <accession> <kmer-length>");
        System.exit(1);
    }
    final String acc = args[0];
    final int KMER_LENGTH = Integer.parseInt(args[1]);

    //Check accession and split
    ReadCollection run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc);
    long numreads = run.getReadCount();

    //Slice the job
    int chunk = 20000; /** amount of reads per 1 map operation **/
    int slices = (int) (numreads / chunk / 1);
    if (slices == 0)
        slices = 1;
    List<LongRange> sub = new ArrayList<LongRange>();
    for (long first = 1; first <= numreads;) {
        long last = first + chunk - 1;
        if (last > numreads)
            last = numreads;
        sub.add(new LongRange(first, last));
        first = last + 1;
    }
    System.err.println("Prepared ranges: \n" + sub);

    JavaRDD<LongRange> jobs = jsc.parallelize(sub, slices);
    //Map
    //
    JavaRDD<String> kmers = jobs.flatMap(new FlatMapFunction<LongRange, String>() {
        ReadCollection run = null;

        @Override
        public Iterable<String> call(LongRange s) {
            //Executes on task nodes
            List<String> ret = new ArrayList<String>();
            try {
                long first = s.getMinimumLong();
                long last = s.getMaximumLong();
                if (run == null) {
                    run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc);
                }
                ReadIterator it = run.getReadRange(first, last - first + 1, Read.all);
                while (it.nextRead()) {
                    //iterate through fragments
                    while (it.nextFragment()) {
                        String bases = it.getFragmentBases();
                        //iterate through kmers
                        for (int i = 0; i < bases.length() - KMER_LENGTH; i++) {
                            ret.add(bases.substring(i, i + KMER_LENGTH));
                        }
                    }
                }
            } catch (ErrorMsg x) {
                System.err.println(x.toString());
                x.printStackTrace();
            }
            return ret;
        }
    });
    //Initiate kmer counting;
    JavaPairRDD<String, Integer> kmer_ones = kmers.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });
    //Reduce counts
    JavaPairRDD<String, Integer> counts = kmer_ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });
    //Collect the output
    List<Tuple2<String, Integer>> output = counts.collect();
    for (Tuple2<String, Integer> tuple : output) {
        System.out.println(tuple._1() + ": " + tuple._2());
    }
    jsc.stop();
}

From source file:SortedWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: SortedWordCount <file>");
        System.exit(1);/*from   w w  w  .j  a  v a  2 s .  c  om*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("SortedWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 4);

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    JavaPairRDD<String, Integer> sorted = counts.sortByKey();

    List<Tuple2<String, Integer>> output = sorted.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + ": " + tuple._2());
    }
    ctx.stop();
}