List of usage examples for com.google.common.collect Lists newArrayList
@GwtCompatible(serializable = true) public static <E> ArrayList<E> newArrayList(Iterator<? extends E> elements)
From source file:com.weibangong.spark.streaming.JavaSqlNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1);//from w w w . j av a2s . c o m } // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); // Convert RDDs of the words DStream to DataFrame and run SQL query words.foreachRDD(new Function2<JavaRDD<String>, Time, Void>() { @Override public Void call(JavaRDD<String> rdd, Time time) { SQLContext sqlContext = JavaSQLContextSingleton.getInstance(rdd.context()); // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() { public JavaRecord call(String word) { JavaRecord record = new JavaRecord(); record.setWord(word); return record; } }); DataFrame wordsDataFrame = sqlContext.createDataFrame(rowRDD, JavaRecord.class); // Register as table wordsDataFrame.registerTempTable("words"); // Do word count on table using SQL and print it DataFrame wordCountsDataFrame = sqlContext .sql("select word, count(*) as total from words group by word"); System.out.println("========= " + time + "========="); wordCountsDataFrame.show(); return null; } }); ssc.start(); ssc.awaitTermination(); }
From source file:JavaNetworkWordCount.java
public static void main(String[] args) { if (args.length < 3) { System.err.println("Usage: JavaNetworkWordCount <app name> <code> <refresh rate in seconds>"); System.exit(1);//from www .j a v a 2s . c o m } String name = args[0]; String kernel = args[1]; int refreshRateSeconds = (new Integer(args[2])).intValue(); StreamingExamples.setStreamingLogLevels(); // Create the context with a <refreshRateSeconds> second batch size SparkConf sparkConf = new SparkConf().setAppName(name); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(refreshRateSeconds)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(DELIMITER.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.spectralogic.ds3client.samples.RecoverJobExample.java
public static void main(final String args[]) throws IOException, URISyntaxException, SignatureException, XmlProcessingException { // Get a client builder and then build a client instance. This is the main entry point to the SDK. try (final Ds3Client client = Ds3ClientBuilder.fromEnv().withHttps(false).build()) { final String bucketName = "recover_get_books_job_bucket"; //The bucket we are interested in getting objects from. Ds3ClientHelpers helper = Ds3ClientHelpers.wrap(client); helper.ensureBucketExists(bucketName); // Our local path which contains all the files that we want to transfer // This example assumes that there is at least 2 files in the "input" directory final String inputDir = "input/"; final Path inputPath = Paths.get(inputDir); // Get the list of files that are contained in the inputPath final Iterable<Ds3Object> objects = helper.listObjectsForDirectory(inputPath); // Create the write job with the bucket we want to write to and the list // of objects that will be written final Ds3ClientHelpers.Job job = helper.startWriteJob(bucketName, objects); // Start the write job using an Object Putter that will read the files // from the local file system. job.transfer(new FileObjectPutter(inputPath)); // Create a local output directory to place retrieved objects into final Path downloadPath = FileSystems.getDefault().getPath("output/"); if (!Files.exists(downloadPath)) { Files.createDirectory(downloadPath); }/* www. j a v a2 s .c om*/ // Get the first object final List<Ds3Object> objectsList = Lists.newArrayList(objects); final Ds3ClientHelpers.Job readJob = helper.startReadJob(bucketName, objectsList); // Explicitly only get the 1st object for this example, in order to "recover" the job while in progress. final Ds3Object object1 = objectsList.get(0); final FileChannel channel1 = FileChannel.open(downloadPath.resolve(object1.getName()), StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); client.getObject(new GetObjectRequest(bucketName, object1.getName(), channel1, readJob.getJobId(), 0)); /** * Here is where we attempt to recover from a hypothetical interruption - before we get the 2nd object, * and while the job is still "In Progress" */ try { // Ask the server for all unsent chunks from readJob final Ds3ClientHelpers.Job recoverJob = helper.recoverReadJob(readJob.getJobId()); // Use the transfer() method for multithreaded parallel transfer. recoverJob.transfer(new FileObjectGetter(downloadPath)); } catch (final JobRecoveryException e) { System.out.println("Could not recover ReadJob " + readJob.getJobId().toString()); e.printStackTrace(); } } catch (final IOException e) { System.out.println( "Unable to create a client from ENV. Please verify that DS3_ENDPOINT, DS3_ACCESS_KEY, and DS3_SECRET_KEY are defined."); } }
From source file:JavaKafkaWordCount_old.java
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); sparkConf.setMaster("local[2]"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); int numThreads = 1; String zkQuorum = "localhost:5181"; String group = "test-consumer-group"; Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put("test", numThreads); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zkQuorum, group, topicMap);/*from w ww . j a v a 2 s . com*/ JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
From source file:cn.com.warlock.streaming.JavaDirectKafkaWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1);//from www . j a v a2 s.co m } String brokers = args[0]; String topics = args[1]; SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); // StreamingContext,?2? JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); kafkaParams.put("auto.offset.reset", "smallest"); // brokers topics direct kafka stream JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // ? JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); // ??? JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); // ?? JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
From source file:com.naltel.spark.JavaSqlNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1);/*from ww w .java2s . c o m*/ } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); // Convert RDDs of the words DStream to DataFrame and run SQL query words.foreachRDD(new Function2<JavaRDD<String>, Time, Void>() { @Override public Void call(JavaRDD<String> rdd, Time time) { SQLContext sqlContext = JavaSQLContextSingleton.getInstance(rdd.context()); // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() { public JavaRecord call(String word) { JavaRecord record = new JavaRecord(); record.setWord(word); return record; } }); DataFrame wordsDataFrame = sqlContext.createDataFrame(rowRDD, JavaRecord.class); // Register as table wordsDataFrame.registerTempTable("words"); // Do word count on table using SQL and print it DataFrame wordCountsDataFrame = sqlContext .sql("select word, count(*) as total from words group by word"); System.out.println("========= " + time + "========="); wordCountsDataFrame.show(); return null; } }); ssc.start(); ssc.awaitTermination(); }
From source file:org.apache.spark.streaming.examples.JavaCustomReceiver.java
public static void main(String[] args) { if (args.length < 3) { System.err.println("Usage: JavaNetworkWordCount <master> <hostname> <port>\n" + "In local mode, <master> should be 'local[n]' with n > 1"); System.exit(1);// w w w .j av a 2s .c om } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size JavaStreamingContext ssc = new JavaStreamingContext(args[0], "JavaNetworkWordCount", new Duration(1000), System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaNetworkWordCount.class)); // Create a input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaDStream<String> lines = ssc.receiverStream(new JavaCustomReceiver(args[1], Integer.parseInt(args[2]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.weibangong.spark.streaming.JavaCustomReceiver.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaCustomReceiver <hostname> <port>"); System.exit(1);/*from w ww .j a va 2s .com*/ } // StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create a input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc .receiverStream(new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.naltel.spark.JavaCustomReceiver.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaCustomReceiver <hostname> <port>"); System.exit(1);/* www .j a va 2 s . c om*/ } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create a input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc .receiverStream(new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sdw.dream.spark.examples.streaming.JavaDirectKafkaWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1);// w ww .j av a 2 s.c o m } StreamingExamples.setStreamingLogLevels(); String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }