List of usage examples for com.google.common.collect Lists newArrayList
@GwtCompatible(serializable = true) public static <E> ArrayList<E> newArrayList(Iterator<? extends E> elements)
From source file:com.twitterfeed.consumer.KafkaSparkConsumer.java
public static void main(String[] args) { String brokers = "localhost:9092"; String topics = "test"; // Create context with 2 second batch interval SparkConf sparkConf = new SparkConf().setAppName("TwitterAnalysis"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override//from ww w .j av a 2 s. c o m public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
From source file:com.google.cloud.genomics.gettingstarted.MainExample.java
public static void main(String[] args) throws IOException { Arguments arguments = new Arguments(); JCommander parser = new JCommander(arguments); try {// w w w . ja va2s .co m // Parse the command line parser.parse(args); // Authorization VerificationCodeReceiver receiver = arguments.noLocalServer ? new GooglePromptReceiver() : new LocalServerReceiver(); GenomicsFactory genomicsFactory = GenomicsFactory.builder("getting_started_java") .setScopes(Lists.newArrayList(GenomicsScopes.GENOMICS)) .setVerificationCodeReceiver(Suppliers.ofInstance(receiver)).build(); File clientSecrets = new File(arguments.clientSecretsFilename); if (!clientSecrets.exists()) { System.err.println("Client secrets file " + arguments.clientSecretsFilename + " does not exist." + " Visit https://cloud.google.com/genomics/install-genomics-tools#authenticate to learn how" + " to install a client_secrets.json file. If you have installed a client_secrets.json" + " in a specific location, use --client_secrets_filename <path>/client_secrets.json."); return; } Genomics genomics = genomicsFactory.fromClientSecretsFile(clientSecrets); // // This example gets the read bases for a sample at specific a position // String datasetId = "10473108253681171589"; // This is the 1000 Genomes dataset ID String sample = "NA12872"; String referenceName = "22"; final Long referencePosition = 51003835L; // 1. First find the read group set ID for the sample SearchReadGroupSetsRequest readsetsReq = new SearchReadGroupSetsRequest() .setDatasetIds(Lists.newArrayList(datasetId)).setName(sample); List<ReadGroupSet> readGroupSets = genomics.readgroupsets().search(readsetsReq) .setFields("readGroupSets(id)").execute().getReadGroupSets(); if (readGroupSets == null || readGroupSets.size() != 1) { System.err .println("Searching for " + sample + " didn't return the right number of read group sets"); return; } String readGroupSetId = readGroupSets.get(0).getId(); // 2. Once we have the read group set ID, // lookup the reads at the position we are interested in SearchReadsRequest readsReq = new SearchReadsRequest() .setReadGroupSetIds(Lists.newArrayList(readGroupSetId)).setReferenceName(referenceName) .setStart(referencePosition).setEnd(referencePosition + 1); List<Read> reads = genomics.reads().search(readsReq).setFields("alignments(alignment,alignedSequence)") .execute().getAlignments(); Map<Character, Integer> baseCounts = Maps.newHashMap(); for (Read read : reads) { int index = (int) (referencePosition - read.getAlignment().getPosition().getPosition()); // Note: This is simplistic - the cigar should be considered for real code Character base = read.getAlignedSequence().charAt(index); if (!baseCounts.containsKey(base)) { baseCounts.put(base, 0); } baseCounts.put(base, baseCounts.get(base) + 1); } System.out.println(sample + " bases on " + referenceName + " at " + referencePosition + " are"); for (Map.Entry<Character, Integer> entry : baseCounts.entrySet()) { System.out.println(entry.getKey() + ": " + entry.getValue()); } // // This example gets the variants for a sample at a specific position // // 1. First find the call set ID for the sample SearchCallSetsRequest callSetsReq = new SearchCallSetsRequest() .setVariantSetIds(Lists.newArrayList(datasetId)).setName(sample); List<CallSet> callSets = genomics.callsets().search(callSetsReq).setFields("callSets(id)").execute() .getCallSets(); if (callSets == null || callSets.size() != 1) { System.err.println("Searching for " + sample + " didn't return the right number of call sets"); return; } String callSetId = callSets.get(0).getId(); // 2. Once we have the call set ID, // lookup the variants that overlap the position we are interested in SearchVariantsRequest variantsReq = new SearchVariantsRequest() .setCallSetIds(Lists.newArrayList(callSetId)).setReferenceName(referenceName) .setStart(referencePosition).setEnd(referencePosition + 1); Variant variant = genomics.variants().search(variantsReq) .setFields("variants(names,referenceBases,alternateBases,calls(genotype))").execute() .getVariants().get(0); String variantName = variant.getNames().get(0); List<String> genotype = Lists.newArrayList(); for (Integer g : variant.getCalls().get(0).getGenotype()) { if (g == 0) { genotype.add(variant.getReferenceBases()); } else { genotype.add(variant.getAlternateBases().get(g - 1)); } } System.out.println("the called genotype is " + Joiner.on(',').join(genotype) + " at " + variantName); } catch (ParameterException e) { System.err.append(e.getMessage()).append("\n"); parser.usage(); } catch (IllegalStateException e) { System.err.println(e.getMessage()); } catch (Throwable t) { t.printStackTrace(); } }
From source file:example.JavaNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1);/*from w w w .j a v a2s . c o m*/ } // StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:mobicloud.examples.streaming.JavaNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1);/*w w w . j ava 2 s .c om*/ } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.weibangong.spark.streaming.JavaStatefulNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);//from www. ja v a2 s . c om } // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.aliyun.emr.example.JavaONSWordCount.java
public static void main(String[] args) { if (args.length < 5) { System.err.println(//from w w w .j av a 2 s . c o m "Usage: bin/spark-submit --class ONSSample examples-1.0-SNAPSHOT-shaded.jar <accessKeyId> " + "<accessKeySecret> <consumerId> <topic> <subExpression>"); System.exit(1); } String accessKeyId = args[0]; String accessKeySecret = args[1]; String consumerId = args[2]; String topic = args[3]; String subExpression = args[4]; SparkConf sparkConf = new SparkConf().setAppName("JavaONSWordCount"); // Create the context with 2 seconds batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); JavaReceiverInputDStream<byte[]> lines = OnsUtils.createStream(jssc, consumerId, topic, subExpression, accessKeyId, accessKeySecret, StorageLevel.MEMORY_AND_DISK(), new Function<Message, byte[]>() { @Override public byte[] call(Message msg) throws Exception { return msg.getBody(); } }); JavaDStream<String> words = lines.map(new Function<byte[], String>() { @Override public String call(byte[] v1) throws Exception { return new String(v1); } }).flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
From source file:tv.icntv.grade.film.recommend.TopNJob.java
public static void main(String[] args) throws Exception { final Configuration configuration = HBaseConfiguration.create(); configuration.addResource("grade.xml"); String tables = configuration.get("hbase.cdn.tables"); if (Strings.isNullOrEmpty(tables)) { return;//from w w w. j a v a 2s. co m } List<String> list = Lists.newArrayList(Splitter.on(",").split(tables)); List<String> results = Lists.transform(list, new Function<String, String>() { @Override public String apply(@Nullable java.lang.String input) { return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input); } }); String[] arrays = new String[] { Joiner.on(",").join(results), String.format(configuration.get("hdfs.directory.num.middle"), new Date()), String.format(configuration.get("hdfs.directory.num.result"), new Date()) }; AbstractJob job = new TopNJob(); // job.setStart(true); int i = ToolRunner.run(configuration, job, arrays); System.exit(i); }
From source file:brooklyn.demo.WebClusterExample.java
public static void main(String[] argv) { List<String> args = Lists.newArrayList(argv); String port = CommandLineUtil.getCommandLineOption(args, "--port", "8081+"); String location = CommandLineUtil.getCommandLineOption(args, "--location", DEFAULT_LOCATION); // TODO Want to parse, to handle multiple locations BrooklynLauncher launcher = BrooklynLauncher.newInstance() .application(/*from ww w . j a va 2 s . co m*/ EntitySpec.create(WebClusterExample.class).displayName("Brooklyn WebApp Cluster example")) .webconsolePort(port).location(location).start(); Entities.dumpInfo(launcher.getApplications()); }
From source file:spark.test.JavaCustomReceiver.java
public static void main(String[] args) { // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create a input stream with the custom receiver on target ip:port and // count the//w ww . j av a2 s. co m // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc.receiverStream(new JavaCustomReceiver(hostname, portNumber)); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
From source file:com.sdw.dream.spark.examples.streaming.JavaStatefulNetworkWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>"); System.exit(1);/* w ww . j a v a2 s.c o m*/ } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); ssc.checkpoint("."); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<String, Integer>("hello", 1), new Tuple2<String, Integer>("world", 1)); JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordsDstream = words .mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); // Update the cumulative count function final Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.or(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<String, Integer>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordsDstream .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); ssc.start(); ssc.awaitTermination(); }