Example usage for org.apache.spark.api.java.function VoidFunction VoidFunction

List of usage examples for org.apache.spark.api.java.function VoidFunction VoidFunction

Introduction

In this page you can find the example usage for org.apache.spark.api.java.function VoidFunction VoidFunction.

Prototype

VoidFunction

Source Link

Usage

From source file:TwitterHashTagCount.java

License:Apache License

private static void twitterStreaming(int window, int slide) {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
    JavaReceiverInputDStream<twitter4j.Status> stream = TwitterUtils.createStream(ssc);

    FlatMapFunction<twitter4j.Status, String> mapFunc = new FlatMapFunction<twitter4j.Status, String>() {
        @Override/*w ww .j  a  v  a  2 s  .  co m*/
        public Iterable<String> call(twitter4j.Status status) {
            ArrayList<String> hashTag = new ArrayList<String>();
            Pattern p = Pattern.compile("#(\\w+)\\b");
            Matcher m = p.matcher(status.getText());
            while (m.find()) {
                hashTag.add(m.group(1));
            }
            return hashTag;
        }
    };

    VoidFunction<JavaPairRDD<Integer, String>> outFunc = new VoidFunction<JavaPairRDD<Integer, String>>() {
        @Override
        public void call(JavaPairRDD<Integer, String> rdd) {
            List<Tuple2<Integer, String>> list = rdd.take(10);
            Iterator<Tuple2<Integer, String>> ite = list.iterator();
            System.out.println("-------------------------");
            String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
            System.out.println("   " + timeStamp);
            System.out.println("-------------------------");
            while (ite.hasNext()) {
                Tuple2<Integer, String> tag = ite.next();
                System.out.println(tag.toString());
            }
        }
    };

    stream.flatMap(mapFunc).mapToPair((String s) -> {
        return new Tuple2<String, Integer>(s, 1);
    }).reduceByKeyAndWindow((Integer i1, Integer i2) -> {
        return i1 + i2;
    }, Durations.seconds(window), Durations.seconds(slide)).mapToPair((Tuple2<String, Integer> item) -> {
        return item.swap();
    }).transformToPair((JavaPairRDD<Integer, String> rdd) -> {
        return rdd.sortByKey(false);
    }).foreachRDD(outFunc);

    ssc.start();
    ssc.awaitTermination();
}

From source file:cn.lhfei.spark.streaming.NginxlogSorter.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = null;//  w w  w  . j  a  v  a 2  s . c  o  m
    try {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("NginxlogSorter");
        //conf.set("hadoop.home.dir", "/usr/hdp/2.4.0.0-169/hadoop");

        sc = new JavaSparkContext(conf);

        JavaRDD<String> lines = sc.textFile("src/test/resources/nginx_report.txt");

        JavaRDD<NginxLog> items = lines.map(new Function<String, NginxLog>() {
            private static final long serialVersionUID = -1530783780334450383L;

            @Override
            public NginxLog call(String v1) throws Exception {
                NginxLog item = new NginxLog();
                String[] arrays = v1.split("[\\t]");

                if (arrays.length == 3) {
                    item.setIp(arrays[0]);
                    item.setLiveTime(Long.parseLong(arrays[1]));
                    item.setAgent(arrays[2]);
                }
                return item;
            }
        });

        log.info("=================================Length: [{}]", items.count());

        JavaPairRDD<String, Iterable<NginxLog>> keyMaps = items.groupBy(new Function<NginxLog, String>() {

            @Override
            public String call(NginxLog v1) throws Exception {
                return v1.getIp();
            }
        });

        log.info("=================================Group by Key Length: [{}]", keyMaps.count());

        keyMaps.foreach(new VoidFunction<Tuple2<String, Iterable<NginxLog>>>() {

            @Override
            public void call(Tuple2<String, Iterable<NginxLog>> t) throws Exception {
                log.info("++++++++++++++++++++++++++++++++ key: {}", t._1);

                Iterator<NginxLog> ts = t._2().iterator();

                while (ts.hasNext()) {
                    log.info("=====================================[{}]", ts.next().toString());
                }
            }

        });

        keyMaps.saveAsTextFile("src/test/resources/nginx_report-result.txt");

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        sc.close();
    }
}

From source file:cn.lhfei.spark.streaming.NginxlogSorterApp.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = null;//from   w ww.  j a v  a2 s. c om
    try {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("NginxlogSorterApp");
        sc = new JavaSparkContext(conf);
        JavaRDD<String> lines = sc.textFile(ORIGIN_PATH);

        JavaRDD<NginxLog> items = lines.map(new Function<String, NginxLog>() {
            private static final long serialVersionUID = -1530783780334450383L;

            @Override
            public NginxLog call(String v1) throws Exception {
                NginxLog item = new NginxLog();
                String[] arrays = v1.split("[\\t]");

                if (arrays.length == 3) {
                    item.setIp(arrays[0]);
                    item.setLiveTime(Long.parseLong(arrays[1]));
                    item.setAgent(arrays[2]);
                }
                return item;
            }
        });

        log.info("=================================Length: [{}]", items.count());

        JavaPairRDD<String, Iterable<NginxLog>> keyMaps = items.groupBy(new Function<NginxLog, String>() {

            @Override
            public String call(NginxLog v1) throws Exception {
                return v1.getIp();
            }
        });

        log.info("=================================Group by Key Length: [{}]", keyMaps.count());

        keyMaps.foreach(new VoidFunction<Tuple2<String, Iterable<NginxLog>>>() {

            @Override
            public void call(Tuple2<String, Iterable<NginxLog>> t) throws Exception {
                log.info("++++++++++++++++++++++++++++++++ key: {}", t._1);

                Iterator<NginxLog> ts = t._2().iterator();

                while (ts.hasNext()) {
                    log.info("=====================================[{}]", ts.next().toString());
                }
            }

        });

        FileUtils.deleteDirectory(new File(DESTI_PATH));
        keyMaps.saveAsTextFile(DESTI_PATH);

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        sc.close();
    }
}

From source file:com.andado.spark.examples.mllib.JavaChiSqSelectorExample.java

License:Apache License

public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaChiSqSelectorExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    JavaRDD<LabeledPoint> points = MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt")
            .toJavaRDD().cache();//ww w. jav a2 s  .c om

    // Discretize data in 16 equal bins since ChiSqSelector requires categorical features
    // Although features are doubles, the ChiSqSelector treats each unique value as a category
    JavaRDD<LabeledPoint> discretizedData = points.map(new Function<LabeledPoint, LabeledPoint>() {
        @Override
        public LabeledPoint call(LabeledPoint lp) {
            final double[] discretizedFeatures = new double[lp.features().size()];
            for (int i = 0; i < lp.features().size(); ++i) {
                discretizedFeatures[i] = Math.floor(lp.features().apply(i) / 16);
            }
            return new LabeledPoint(lp.label(), Vectors.dense(discretizedFeatures));
        }
    });

    // Create ChiSqSelector that will select top 50 of 692 features
    ChiSqSelector selector = new ChiSqSelector(50);
    // Create ChiSqSelector model (selecting features)
    final ChiSqSelectorModel transformer = selector.fit(discretizedData.rdd());
    // Filter the top 50 features from each feature vector
    JavaRDD<LabeledPoint> filteredData = discretizedData.map(new Function<LabeledPoint, LabeledPoint>() {
        @Override
        public LabeledPoint call(LabeledPoint lp) {
            return new LabeledPoint(lp.label(), transformer.transform(lp.features()));
        }
    });
    // $example off$

    System.out.println("filtered data: ");
    filteredData.foreach(new VoidFunction<LabeledPoint>() {
        @Override
        public void call(LabeledPoint labeledPoint) throws Exception {
            System.out.println(labeledPoint.toString());
        }
    });

    jsc.stop();
}

From source file:com.andado.spark.examples.mllib.JavaElementwiseProductExample.java

License:Apache License

public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    // Create some vector data; also works for sparse vectors
    JavaRDD<Vector> data = jsc
            .parallelize(Arrays.asList(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)));
    Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
    final ElementwiseProduct transformer = new ElementwiseProduct(transformingVector);

    // Batch transform and per-row transform give the same results:
    JavaRDD<Vector> transformedData = transformer.transform(data);
    JavaRDD<Vector> transformedData2 = data.map(new Function<Vector, Vector>() {
        @Override//from ww w . j a  v  a2 s  . com
        public Vector call(Vector v) {
            return transformer.transform(v);
        }
    });
    // $example off$

    System.out.println("transformedData: ");
    transformedData.foreach(new VoidFunction<Vector>() {
        @Override
        public void call(Vector vector) throws Exception {
            System.out.println(vector.toString());
        }
    });

    System.out.println("transformedData2: ");
    transformedData2.foreach(new VoidFunction<Vector>() {
        @Override
        public void call(Vector vector) throws Exception {
            System.out.println(vector.toString());
        }
    });

    jsc.stop();
}

From source file:com.andado.spark.examples.mllib.JavaStreamingTestExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.err//from   w  w w. j a  va 2 s  .c  om
                .println("Usage: JavaStreamingTestExample " + "<dataDir> <batchDuration> <numBatchesTimeout>");
        System.exit(1);
    }

    String dataDir = args[0];
    Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
    int numBatchesTimeout = Integer.parseInt(args[2]);

    SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
    JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);

    ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());

    // $example on$
    JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(new Function<String, BinarySample>() {
        @Override
        public BinarySample call(String line) {
            String[] ts = line.split(",");
            boolean label = Boolean.parseBoolean(ts[0]);
            double value = Double.parseDouble(ts[1]);
            return new BinarySample(label, value);
        }
    });

    StreamingTest streamingTest = new StreamingTest().setPeacePeriod(0).setWindowSize(0).setTestMethod("welch");

    JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
    out.print();
    // $example off$

    // Stop processing if test becomes significant or we time out
    timeoutCounter = numBatchesTimeout;

    out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
        @Override
        public void call(JavaRDD<StreamingTestResult> rdd) {
            timeoutCounter -= 1;

            boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
                @Override
                public Boolean call(StreamingTestResult v) {
                    return v.pValue() < 0.05;
                }
            }).isEmpty();

            if (timeoutCounter <= 0 || anySignificant) {
                rdd.context().stop();
            }
        }
    });

    ssc.start();
    ssc.awaitTermination();
}

From source file:com.anhth12.lambda.fn.Functions.java

public static <T> VoidFunction<T> noOp() {
    return new VoidFunction<T>() {

        @Override//from   w w  w  .j a va  2 s . c  om
        public void call(T t) throws Exception {
            // do nothing
        }
    };
}

From source file:com.cloudera.oryx.lambda.Functions.java

License:Open Source License

public static <T> VoidFunction<T> noOp() {
    return new VoidFunction<T>() {
        @Override/* w  ww .jav a  2 s.c o m*/
        public void call(T t) {
            // do nothing
        }
    };
}

From source file:com.cts.kafkaspark.SparkConsumer.java

public static void main(String[] args) throws InterruptedException {

    //Set kafka consumer properties
    HashMap<String, String> kafkaParams = Configuration.setKafkaConsumerParameter();
    //Set spark properties
    SparkConf sparkConf = Configuration.setSparkParameter();
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(1));

    //Create stream for accessing data from kafka broker on topic jsondata
    JavaPairInputDStream<String, String> jsonData = StreamCreator.createJsonDataStream(jssc, kafkaParams,
            new HashSet<String>(Arrays.asList(Configuration.topic1.split(","))));
    //  JavaPairInputDStream<String, String> derbyData = StreamCreator.createJsonDataStream(jssc, kafkaParams, new HashSet<String>(Arrays.asList("derbydata".split(","))));
    //Read data from stream and store into disk
    jsonData.map(new Function<Tuple2<String, String>, String>() {
        @Override//from  ww w.j a v  a  2  s  .c  o  m
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    }).foreachRDD(new VoidFunction<JavaRDD<String>>() {
        @Override
        public void call(JavaRDD<String> rdd) throws Exception {
            if (!rdd.isEmpty()) {
                //rdd.saveAsTextFile("C:\\Users\\Downloads\\file2.txt");
                rdd.collect();
            }

        }
    });

    //
    //        derbyData.map(new Function<Tuple2<String, String>, String>() {
    //            @Override
    //            public String call(Tuple2<String, String> tuple2) {
    //                return tuple2._2();
    //            }
    //        }).foreachRDD(new Function<JavaRDD<String>, Void>() {
    //            @Override
    //            public Void call(JavaRDD<String> rdd) throws Exception {
    //                if (!rdd.isEmpty()) {
    //                    rdd.saveAsTextFile("C:\\Users\\Downloads\\file3.txt");
    //                }
    //                return null;
    //            }
    //        });
    jssc.start();
    jssc.awaitTermination();

}

From source file:com.github.heuermh.adam.commands.JavaCountAlignments.java

License:Apache License

private void run(final SparkContext sc) {
    ADAMContext ac = new ADAMContext(sc);
    JavaADAMContext javaAdamContext = new JavaADAMContext(ac);
    AlignmentRecordRDD alignments = javaAdamContext.loadAlignments(inputPath);
    JavaRDD<AlignmentRecord> jrdd = alignments.jrdd();

    JavaRDD<String> contigNames = jrdd.map(new Function<AlignmentRecord, String>() {
        @Override/*  w ww . ja v  a2s. co m*/
        public String call(final AlignmentRecord rec) {
            return rec.getReadMapped() ? rec.getContigName() : "unmapped";
        }
    });

    JavaPairRDD<String, Integer> counts = contigNames.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(final String contigName) {
            return new Tuple2<String, Integer>(contigName, Integer.valueOf(1));
        }
    });

    JavaPairRDD<String, Integer> reducedCounts = counts.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(final Integer value0, final Integer value1) {
            return Integer.valueOf(value0.intValue() + value1.intValue());
        }
    });

    reducedCounts.foreach(new VoidFunction<Tuple2<String, Integer>>() {
        @Override
        public void call(final Tuple2<String, Integer> value) {
            System.out.println(value.toString());
        }
    });
}