List of usage examples for org.apache.spark.api.java.function VoidFunction VoidFunction
VoidFunction
From source file:TwitterHashTagCount.java
License:Apache License
private static void twitterStreaming(int window, int slide) { // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); JavaReceiverInputDStream<twitter4j.Status> stream = TwitterUtils.createStream(ssc); FlatMapFunction<twitter4j.Status, String> mapFunc = new FlatMapFunction<twitter4j.Status, String>() { @Override/*w ww .j a v a 2 s . co m*/ public Iterable<String> call(twitter4j.Status status) { ArrayList<String> hashTag = new ArrayList<String>(); Pattern p = Pattern.compile("#(\\w+)\\b"); Matcher m = p.matcher(status.getText()); while (m.find()) { hashTag.add(m.group(1)); } return hashTag; } }; VoidFunction<JavaPairRDD<Integer, String>> outFunc = new VoidFunction<JavaPairRDD<Integer, String>>() { @Override public void call(JavaPairRDD<Integer, String> rdd) { List<Tuple2<Integer, String>> list = rdd.take(10); Iterator<Tuple2<Integer, String>> ite = list.iterator(); System.out.println("-------------------------"); String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime()); System.out.println(" " + timeStamp); System.out.println("-------------------------"); while (ite.hasNext()) { Tuple2<Integer, String> tag = ite.next(); System.out.println(tag.toString()); } } }; stream.flatMap(mapFunc).mapToPair((String s) -> { return new Tuple2<String, Integer>(s, 1); }).reduceByKeyAndWindow((Integer i1, Integer i2) -> { return i1 + i2; }, Durations.seconds(window), Durations.seconds(slide)).mapToPair((Tuple2<String, Integer> item) -> { return item.swap(); }).transformToPair((JavaPairRDD<Integer, String> rdd) -> { return rdd.sortByKey(false); }).foreachRDD(outFunc); ssc.start(); ssc.awaitTermination(); }
From source file:cn.lhfei.spark.streaming.NginxlogSorter.java
License:Apache License
public static void main(String[] args) { JavaSparkContext sc = null;// w w w . j a v a 2 s . c o m try { SparkConf conf = new SparkConf().setMaster("local").setAppName("NginxlogSorter"); //conf.set("hadoop.home.dir", "/usr/hdp/2.4.0.0-169/hadoop"); sc = new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile("src/test/resources/nginx_report.txt"); JavaRDD<NginxLog> items = lines.map(new Function<String, NginxLog>() { private static final long serialVersionUID = -1530783780334450383L; @Override public NginxLog call(String v1) throws Exception { NginxLog item = new NginxLog(); String[] arrays = v1.split("[\\t]"); if (arrays.length == 3) { item.setIp(arrays[0]); item.setLiveTime(Long.parseLong(arrays[1])); item.setAgent(arrays[2]); } return item; } }); log.info("=================================Length: [{}]", items.count()); JavaPairRDD<String, Iterable<NginxLog>> keyMaps = items.groupBy(new Function<NginxLog, String>() { @Override public String call(NginxLog v1) throws Exception { return v1.getIp(); } }); log.info("=================================Group by Key Length: [{}]", keyMaps.count()); keyMaps.foreach(new VoidFunction<Tuple2<String, Iterable<NginxLog>>>() { @Override public void call(Tuple2<String, Iterable<NginxLog>> t) throws Exception { log.info("++++++++++++++++++++++++++++++++ key: {}", t._1); Iterator<NginxLog> ts = t._2().iterator(); while (ts.hasNext()) { log.info("=====================================[{}]", ts.next().toString()); } } }); keyMaps.saveAsTextFile("src/test/resources/nginx_report-result.txt"); } catch (Exception e) { e.printStackTrace(); } finally { sc.close(); } }
From source file:cn.lhfei.spark.streaming.NginxlogSorterApp.java
License:Apache License
public static void main(String[] args) { JavaSparkContext sc = null;//from w ww. j a v a2 s. c om try { SparkConf conf = new SparkConf().setMaster("local").setAppName("NginxlogSorterApp"); sc = new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile(ORIGIN_PATH); JavaRDD<NginxLog> items = lines.map(new Function<String, NginxLog>() { private static final long serialVersionUID = -1530783780334450383L; @Override public NginxLog call(String v1) throws Exception { NginxLog item = new NginxLog(); String[] arrays = v1.split("[\\t]"); if (arrays.length == 3) { item.setIp(arrays[0]); item.setLiveTime(Long.parseLong(arrays[1])); item.setAgent(arrays[2]); } return item; } }); log.info("=================================Length: [{}]", items.count()); JavaPairRDD<String, Iterable<NginxLog>> keyMaps = items.groupBy(new Function<NginxLog, String>() { @Override public String call(NginxLog v1) throws Exception { return v1.getIp(); } }); log.info("=================================Group by Key Length: [{}]", keyMaps.count()); keyMaps.foreach(new VoidFunction<Tuple2<String, Iterable<NginxLog>>>() { @Override public void call(Tuple2<String, Iterable<NginxLog>> t) throws Exception { log.info("++++++++++++++++++++++++++++++++ key: {}", t._1); Iterator<NginxLog> ts = t._2().iterator(); while (ts.hasNext()) { log.info("=====================================[{}]", ts.next().toString()); } } }); FileUtils.deleteDirectory(new File(DESTI_PATH)); keyMaps.saveAsTextFile(DESTI_PATH); } catch (Exception e) { e.printStackTrace(); } finally { sc.close(); } }
From source file:com.andado.spark.examples.mllib.JavaChiSqSelectorExample.java
License:Apache License
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaChiSqSelectorExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ JavaRDD<LabeledPoint> points = MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt") .toJavaRDD().cache();//ww w. jav a2 s .c om // Discretize data in 16 equal bins since ChiSqSelector requires categorical features // Although features are doubles, the ChiSqSelector treats each unique value as a category JavaRDD<LabeledPoint> discretizedData = points.map(new Function<LabeledPoint, LabeledPoint>() { @Override public LabeledPoint call(LabeledPoint lp) { final double[] discretizedFeatures = new double[lp.features().size()]; for (int i = 0; i < lp.features().size(); ++i) { discretizedFeatures[i] = Math.floor(lp.features().apply(i) / 16); } return new LabeledPoint(lp.label(), Vectors.dense(discretizedFeatures)); } }); // Create ChiSqSelector that will select top 50 of 692 features ChiSqSelector selector = new ChiSqSelector(50); // Create ChiSqSelector model (selecting features) final ChiSqSelectorModel transformer = selector.fit(discretizedData.rdd()); // Filter the top 50 features from each feature vector JavaRDD<LabeledPoint> filteredData = discretizedData.map(new Function<LabeledPoint, LabeledPoint>() { @Override public LabeledPoint call(LabeledPoint lp) { return new LabeledPoint(lp.label(), transformer.transform(lp.features())); } }); // $example off$ System.out.println("filtered data: "); filteredData.foreach(new VoidFunction<LabeledPoint>() { @Override public void call(LabeledPoint labeledPoint) throws Exception { System.out.println(labeledPoint.toString()); } }); jsc.stop(); }
From source file:com.andado.spark.examples.mllib.JavaElementwiseProductExample.java
License:Apache License
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ // Create some vector data; also works for sparse vectors JavaRDD<Vector> data = jsc .parallelize(Arrays.asList(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0))); Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0); final ElementwiseProduct transformer = new ElementwiseProduct(transformingVector); // Batch transform and per-row transform give the same results: JavaRDD<Vector> transformedData = transformer.transform(data); JavaRDD<Vector> transformedData2 = data.map(new Function<Vector, Vector>() { @Override//from ww w . j a v a2 s . com public Vector call(Vector v) { return transformer.transform(v); } }); // $example off$ System.out.println("transformedData: "); transformedData.foreach(new VoidFunction<Vector>() { @Override public void call(Vector vector) throws Exception { System.out.println(vector.toString()); } }); System.out.println("transformedData2: "); transformedData2.foreach(new VoidFunction<Vector>() { @Override public void call(Vector vector) throws Exception { System.out.println(vector.toString()); } }); jsc.stop(); }
From source file:com.andado.spark.examples.mllib.JavaStreamingTestExample.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.err//from w w w. j a va 2 s .c om .println("Usage: JavaStreamingTestExample " + "<dataDir> <batchDuration> <numBatchesTimeout>"); System.exit(1); } String dataDir = args[0]; Duration batchDuration = Seconds.apply(Long.parseLong(args[1])); int numBatchesTimeout = Integer.parseInt(args[2]); SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample"); JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration); ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString()); // $example on$ JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(new Function<String, BinarySample>() { @Override public BinarySample call(String line) { String[] ts = line.split(","); boolean label = Boolean.parseBoolean(ts[0]); double value = Double.parseDouble(ts[1]); return new BinarySample(label, value); } }); StreamingTest streamingTest = new StreamingTest().setPeacePeriod(0).setWindowSize(0).setTestMethod("welch"); JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data); out.print(); // $example off$ // Stop processing if test becomes significant or we time out timeoutCounter = numBatchesTimeout; out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() { @Override public void call(JavaRDD<StreamingTestResult> rdd) { timeoutCounter -= 1; boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() { @Override public Boolean call(StreamingTestResult v) { return v.pValue() < 0.05; } }).isEmpty(); if (timeoutCounter <= 0 || anySignificant) { rdd.context().stop(); } } }); ssc.start(); ssc.awaitTermination(); }
From source file:com.anhth12.lambda.fn.Functions.java
public static <T> VoidFunction<T> noOp() { return new VoidFunction<T>() { @Override//from w w w .j a va 2 s . c om public void call(T t) throws Exception { // do nothing } }; }
From source file:com.cloudera.oryx.lambda.Functions.java
License:Open Source License
public static <T> VoidFunction<T> noOp() { return new VoidFunction<T>() { @Override/* w ww .jav a 2 s.c o m*/ public void call(T t) { // do nothing } }; }
From source file:com.cts.kafkaspark.SparkConsumer.java
public static void main(String[] args) throws InterruptedException { //Set kafka consumer properties HashMap<String, String> kafkaParams = Configuration.setKafkaConsumerParameter(); //Set spark properties SparkConf sparkConf = Configuration.setSparkParameter(); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(1)); //Create stream for accessing data from kafka broker on topic jsondata JavaPairInputDStream<String, String> jsonData = StreamCreator.createJsonDataStream(jssc, kafkaParams, new HashSet<String>(Arrays.asList(Configuration.topic1.split(",")))); // JavaPairInputDStream<String, String> derbyData = StreamCreator.createJsonDataStream(jssc, kafkaParams, new HashSet<String>(Arrays.asList("derbydata".split(",")))); //Read data from stream and store into disk jsonData.map(new Function<Tuple2<String, String>, String>() { @Override//from ww w.j a v a 2 s .c o m public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }).foreachRDD(new VoidFunction<JavaRDD<String>>() { @Override public void call(JavaRDD<String> rdd) throws Exception { if (!rdd.isEmpty()) { //rdd.saveAsTextFile("C:\\Users\\Downloads\\file2.txt"); rdd.collect(); } } }); // // derbyData.map(new Function<Tuple2<String, String>, String>() { // @Override // public String call(Tuple2<String, String> tuple2) { // return tuple2._2(); // } // }).foreachRDD(new Function<JavaRDD<String>, Void>() { // @Override // public Void call(JavaRDD<String> rdd) throws Exception { // if (!rdd.isEmpty()) { // rdd.saveAsTextFile("C:\\Users\\Downloads\\file3.txt"); // } // return null; // } // }); jssc.start(); jssc.awaitTermination(); }
From source file:com.github.heuermh.adam.commands.JavaCountAlignments.java
License:Apache License
private void run(final SparkContext sc) { ADAMContext ac = new ADAMContext(sc); JavaADAMContext javaAdamContext = new JavaADAMContext(ac); AlignmentRecordRDD alignments = javaAdamContext.loadAlignments(inputPath); JavaRDD<AlignmentRecord> jrdd = alignments.jrdd(); JavaRDD<String> contigNames = jrdd.map(new Function<AlignmentRecord, String>() { @Override/* w ww . ja v a2s. co m*/ public String call(final AlignmentRecord rec) { return rec.getReadMapped() ? rec.getContigName() : "unmapped"; } }); JavaPairRDD<String, Integer> counts = contigNames.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(final String contigName) { return new Tuple2<String, Integer>(contigName, Integer.valueOf(1)); } }); JavaPairRDD<String, Integer> reducedCounts = counts.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(final Integer value0, final Integer value1) { return Integer.valueOf(value0.intValue() + value1.intValue()); } }); reducedCounts.foreach(new VoidFunction<Tuple2<String, Integer>>() { @Override public void call(final Tuple2<String, Integer> value) { System.out.println(value.toString()); } }); }