Example usage for org.apache.spark.api.java.function FlatMapFunction FlatMapFunction

List of usage examples for org.apache.spark.api.java.function FlatMapFunction FlatMapFunction

Introduction

In this page you can find the example usage for org.apache.spark.api.java.function FlatMapFunction FlatMapFunction.

Prototype

FlatMapFunction

Source Link

Usage

From source file:CacheWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: CacheWordCount <file>");
        System.exit(1);//from   w ww. jav  a  2 s .  c  o m
    }

    SparkConf sparkConf = new SparkConf().setAppName("CacheWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 4);

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });
    //words.cache();

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });
    counts.cache();

    long num = counts.count();
    num = counts.count();

    //JavaPairRDD<String, Integer> sorted = counts.sortByKey();

    /*
    List<Tuple2<String, Integer>> output = sorted.collect();
    for (Tuple2<?,?> tuple : output) {
      System.out.println(tuple._1() + ": " + tuple._2());
    }
    */
    ctx.stop();
}

From source file:JavaKafkaWordCount_old.java

License:Apache License

public static void main(String[] args) {

    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    sparkConf.setMaster("local[2]");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

    int numThreads = 1;
    String zkQuorum = "localhost:5181";
    String group = "test-consumer-group";
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    topicMap.put("test", numThreads);

    JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group,
            topicMap);/*from www . j av  a2s.  c o m*/

    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x));
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}

From source file:SparkJavaWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);/* w  ww . ja v a2s . c om*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    List<Tuple2<String, Integer>> output = counts.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + ": " + tuple._2());
    }
    ctx.stop();
}

From source file:JavaNetworkWordCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 3) {
        System.err.println("Usage: JavaNetworkWordCount <app name> <code> <refresh rate in seconds>");
        System.exit(1);// www .j  a va 2s.  com
    }

    String name = args[0];
    String kernel = args[1];
    int refreshRateSeconds = (new Integer(args[2])).intValue();

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a <refreshRateSeconds> second batch size
    SparkConf sparkConf = new SparkConf().setAppName(name);
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(refreshRateSeconds));

    // Create a JavaReceiverInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    // Note that no duplication in storage level only for running locally.
    // Replication necessary in distributed scenario for fault tolerance.
    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String x) {
            return Lists.newArrayList(DELIMITER.split(x));
        }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:JavaCustomReceiver.java

License:Apache License

public static void main(String[] args) throws Exception {
    args = new String[2];
    args[0] = "localhost";
    args[1] = "12344";
    // StreamingExamples.setStreamingLogLevels();
    logger.error("hi error");
    System.out.println("logging started atleast");
    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster(SPARK_MASTER)
            .setSparkHome("local").setJars(new String[] { "target/SparkProject.0.0.1-SNAPSHOT.jar" });
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));

    JavaReceiverInputDStream<String> lines = ssc.receiverStream(new JavaCustomReceiver("localhost", 12344));
    System.out.println("Received Lines: " + lines.toString());

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        public Iterable<String> call(String x) {
            System.out.println("Flat map processsing " + x);
            return Arrays.asList(x.split(" "));
        }//from  www  . j av a 2  s  .  c  om
    });
    JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print(1000);
    ssc.start();
    ssc.awaitTermination(1000 * 10);

}

From source file:SparkHome.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);/* w w w  .ja  va 2 s .  com*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 2);

    //    JavaRDD<String> counts=lines.flatMap(new FlatMapFunction<String,String>() {
    //         /**
    //        * 
    //        */
    //       private static final long serialVersionUID = 1L;
    //
    //       public Iterable<String> call(String s){
    //            return Arrays.asList(s.split("\\s*,\\s*"));
    //         }
    //
    //    }); 

    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(SPACE.split(s));
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    words.saveAsTextFile("C:\\Users\\i308124\\Desktop\\DB_Comparision_Output\\output_new1.txt");

    //    List<Tuple2<String, Integer>> output = counts.collect();
    //    for (Tuple2<?,?> tuple : output) {
    //      System.out.println(tuple._1() + ": " + tuple._2());
    //    }
    //    
    //    
    ctx.stop();
}

From source file:SparkHome.java

License:Apache License

public static void mySparkCode() {
    JavaSparkContext spark = new JavaSparkContext("localhost", "sparkwordcount");
    //======Using flatMap(RDD of words)==============
    JavaRDD<String> csvData = spark.textFile("C:\\Users\\i308124\\Desktop\\DB_Comparision_Output\\input.txt");

    JavaRDD<String> counts = csvData.flatMap(new FlatMapFunction<String, String>() {
        /**/*from w  w  w .ja  v a  2  s .  c  o  m*/
        * 
        */
        private static final long serialVersionUID = 1L;

        public Iterable<String> call(String s) {
            return Arrays.asList(s.split("\\s*,\\s*"));
        }

    });
    counts.saveAsTextFile("C:\\Users\\i308124\\Desktop\\DB_Comparision_Output\\output.txt");
}

From source file:SparkClosestPair.java

License:Apache License

public static void closestPairFactory(JavaSparkContext ctx) {

    JavaRDD<String> lines = ctx.textFile("hdfs://master:54310/user/closestpair/cl_1.csv");

    JavaRDD<ClosestPairPoint> points = lines
            .mapPartitions(new FlatMapFunction<Iterator<String>, ClosestPairPoint>() {
                public Iterable<ClosestPairPoint> call(Iterator<String> stringIterator) throws Exception {
                    ArrayList<ClosestPairPoint> pointList = new ArrayList<ClosestPairPoint>();
                    while (stringIterator.hasNext()) {
                        String nextString = stringIterator.next();
                        String[] coords = nextString.split(",");
                        ClosestPairPoint p = new ClosestPairPoint(Double.parseDouble(coords[0]),
                                Double.parseDouble(coords[1]));
                        pointList.add(p);
                    }// www . java2 s  .  co m
                    System.out.println(pointList.size());
                    ClosestPair cP = ClosestPair.findClosestPair(pointList);
                    ClosestPairPoint leftPoint = cP.getLeftPoint();
                    ClosestPairPoint rightPoint = cP.getRightPoint();
                    System.out.println("Closest Pair = (" + leftPoint.getxCoord() + "," + leftPoint.getyCoord()
                            + ")" + " (" + rightPoint.getxCoord() + "," + rightPoint.getyCoord() + ")");
                    double xDiff = leftPoint.getxCoord() - rightPoint.getxCoord();
                    double yDiff = leftPoint.getyCoord() - rightPoint.getyCoord();
                    double delta = Math.sqrt((xDiff * xDiff) + (yDiff * yDiff));
                    System.out.println(delta);

                    return ConvexHullBuffer.findBoundaryPoints(pointList, delta, cP);
                }
            });

    List<ClosestPairPoint> output = points.collect();
    System.out.println(output.size());
    JavaRDD<ClosestPairPoint> finalPoints = ctx.parallelize(output).repartition(1);
    JavaRDD<ClosestPairPoint> closestPair = finalPoints
            .mapPartitions(new FlatMapFunction<Iterator<ClosestPairPoint>, ClosestPairPoint>() {
                public Iterable<ClosestPairPoint> call(Iterator<ClosestPairPoint> pointIterator)
                        throws Exception {
                    ArrayList<ClosestPairPoint> pointList = new ArrayList<ClosestPairPoint>();
                    while (pointIterator.hasNext()) {
                        pointList.add(pointIterator.next());
                    }
                    ClosestPair cP = ClosestPair.findClosestPair(pointList);
                    ClosestPairPoint leftPoint = cP.getLeftPoint();
                    ClosestPairPoint rightPoint = cP.getRightPoint();
                    ArrayList<ClosestPairPoint> closestPair = new ArrayList<ClosestPairPoint>();
                    closestPair.add(leftPoint);
                    closestPair.add(rightPoint);
                    return closestPair;
                }
            });
    List<ClosestPairPoint> closesPairPoints = closestPair.collect();
    closestPair.saveAsTextFile("hdfs://master:54310/user/output/closestpair");
    for (ClosestPairPoint p : closesPairPoints) {
        System.out.println(p.getxCoord() + "," + p.getyCoord());
    }
    ctx.stop();
}

From source file:NetworkWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
        System.exit(1);/*from www .  ja  v  a 2s .com*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(3));

    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),
            StorageLevels.MEMORY_AND_DISK_SER);

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
}

From source file:SparkKMer.java

License:Apache License

public static void main(String[] args) throws Exception {
    //Setup/*from  ww w  .  ja v  a2  s  .com*/
    SparkConf sparkConf = new SparkConf().setAppName("SparkKMer");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    //Agrument parsing
    if (args.length < 2) {
        System.err.println("Usage: SparkKMer <accession> <kmer-length>");
        System.exit(1);
    }
    final String acc = args[0];
    final int KMER_LENGTH = Integer.parseInt(args[1]);

    //Check accession and split
    ReadCollection run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc);
    long numreads = run.getReadCount();

    //Slice the job
    int chunk = 20000; /** amount of reads per 1 map operation **/
    int slices = (int) (numreads / chunk / 1);
    if (slices == 0)
        slices = 1;
    List<LongRange> sub = new ArrayList<LongRange>();
    for (long first = 1; first <= numreads;) {
        long last = first + chunk - 1;
        if (last > numreads)
            last = numreads;
        sub.add(new LongRange(first, last));
        first = last + 1;
    }
    System.err.println("Prepared ranges: \n" + sub);

    JavaRDD<LongRange> jobs = jsc.parallelize(sub, slices);
    //Map
    //
    JavaRDD<String> kmers = jobs.flatMap(new FlatMapFunction<LongRange, String>() {
        ReadCollection run = null;

        @Override
        public Iterable<String> call(LongRange s) {
            //Executes on task nodes
            List<String> ret = new ArrayList<String>();
            try {
                long first = s.getMinimumLong();
                long last = s.getMaximumLong();
                if (run == null) {
                    run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc);
                }
                ReadIterator it = run.getReadRange(first, last - first + 1, Read.all);
                while (it.nextRead()) {
                    //iterate through fragments
                    while (it.nextFragment()) {
                        String bases = it.getFragmentBases();
                        //iterate through kmers
                        for (int i = 0; i < bases.length() - KMER_LENGTH; i++) {
                            ret.add(bases.substring(i, i + KMER_LENGTH));
                        }
                    }
                }
            } catch (ErrorMsg x) {
                System.err.println(x.toString());
                x.printStackTrace();
            }
            return ret;
        }
    });
    //Initiate kmer counting;
    JavaPairRDD<String, Integer> kmer_ones = kmers.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    });
    //Reduce counts
    JavaPairRDD<String, Integer> counts = kmer_ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });
    //Collect the output
    List<Tuple2<String, Integer>> output = counts.collect();
    for (Tuple2<String, Integer> tuple : output) {
        System.out.println(tuple._1() + ": " + tuple._2());
    }
    jsc.stop();
}