List of usage examples for com.google.common.collect Lists newArrayList
@GwtCompatible(serializable = true) public static <E> ArrayList<E> newArrayList(Iterator<? extends E> elements)
From source file:com.kakfa.spark.JavaKafkaWordCount.java
public static void main(String[] args) { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1);//from w w w . java 2 s .c o m } StreamingExamples.setStreamingLogLevels(); //SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); //sparkConf.setMaster("spark://60f81dc6426c:7077"); // SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://60f81dc6426c:7077"); // Create the context with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext("local[4]", "JavaKafkaWordCount", new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Logger.getLogger("org").setLevel(Level.OFF); Logger.getLogger("akka").setLevel(Level.OFF); Map<String, Integer> topicMap = new HashMap<String, Integer>(); String[] topics = args[2].split(","); for (String topic : topics) { topicMap.put(topic, numThreads); } /* for(String t: topic) { topicMap.put(t, new Integer(3)); }*/ // NotSerializable notSerializable = new NotSerializable(); //JavaRDD<String> rdd = sc.textFile("/tmp/myfile"); // rdd.map(s -> notSerializable.doSomething(s)).collect(); JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); //JavaPairReceiverInputDStream<String, String> kafkaStream = // KafkaUtils.createStream(jssc, "localhost:2181","streamingContext", // topicMap); System.out.println("Connection !!!!"); /*JavaDStream<String> data = messages.map(new Function<Tuple2<String, String>, String>() { public String call(Tuple2<String, String> message) { return message._2(); } } );*/ JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
From source file:com.google.api.services.samples.youtube.cmdline.youtube_cmdline_uploadvideo_sample.UploadVideo.java
/** * Uploads user selected video in the project folder to the user's YouTube account using OAuth2 * for authentication.//from ww w. j a va 2 s .co m * * @param args command line args (not used). */ public static void main(String[] args) { // Scope required to upload to YouTube. List<String> scopes = Lists.newArrayList("https://www.googleapis.com/auth/youtube.upload"); try { // Authorization. Credential credential = authorize(scopes); // YouTube object used to make all API requests. youtube = new YouTube.Builder(HTTP_TRANSPORT, JSON_FACTORY, credential) .setApplicationName("youtube-cmdline-uploadvideo-sample").build(); // We get the user selected local video file to upload. File videoFile = getVideoFromUser(); System.out.println("You chose " + videoFile + " to upload."); // Add extra information to the video before uploading. Video videoObjectDefiningMetadata = new Video(); /* * Set the video to public, so it is available to everyone (what most people want). This is * actually the default, but I wanted you to see what it looked like in case you need to set * it to "unlisted" or "private" via API. */ VideoStatus status = new VideoStatus(); status.setPrivacyStatus("public"); videoObjectDefiningMetadata.setStatus(status); // We set a majority of the metadata with the VideoSnippet object. VideoSnippet snippet = new VideoSnippet(); /* * The Calendar instance is used to create a unique name and description for test purposes, so * you can see multiple files being uploaded. You will want to remove this from your project * and use your own standard names. */ Calendar cal = Calendar.getInstance(); snippet.setTitle("Test Upload via Java on " + cal.getTime()); snippet.setDescription( "Video uploaded via YouTube Data API V3 using the Java library " + "on " + cal.getTime()); // Set your keywords. List<String> tags = new ArrayList<String>(); tags.add("test"); tags.add("example"); tags.add("java"); tags.add("YouTube Data API V3"); tags.add("erase me"); snippet.setTags(tags); // Set completed snippet to the video object. videoObjectDefiningMetadata.setSnippet(snippet); InputStreamContent mediaContent = new InputStreamContent(VIDEO_FILE_FORMAT, new BufferedInputStream(new FileInputStream(videoFile))); mediaContent.setLength(videoFile.length()); /* * The upload command includes: 1. Information we want returned after file is successfully * uploaded. 2. Metadata we want associated with the uploaded video. 3. Video file itself. */ YouTube.Videos.Insert videoInsert = youtube.videos().insert("snippet,statistics,status", videoObjectDefiningMetadata, mediaContent); // Set the upload type and add event listener. MediaHttpUploader uploader = videoInsert.getMediaHttpUploader(); /* * Sets whether direct media upload is enabled or disabled. True = whole media content is * uploaded in a single request. False (default) = resumable media upload protocol to upload * in data chunks. */ uploader.setDirectUploadEnabled(false); MediaHttpUploaderProgressListener progressListener = new MediaHttpUploaderProgressListener() { public void progressChanged(MediaHttpUploader uploader) throws IOException { switch (uploader.getUploadState()) { case INITIATION_STARTED: System.out.println("Initiation Started"); break; case INITIATION_COMPLETE: System.out.println("Initiation Completed"); break; case MEDIA_IN_PROGRESS: System.out.println("Upload in progress"); System.out.println("Upload percentage: " + uploader.getProgress()); break; case MEDIA_COMPLETE: System.out.println("Upload Completed!"); break; case NOT_STARTED: System.out.println("Upload Not Started!"); break; } } }; uploader.setProgressListener(progressListener); // Execute upload. Video returnedVideo = videoInsert.execute(); // Print out returned results. System.out.println("\n================== Returned Video ==================\n"); System.out.println(" - Id: " + returnedVideo.getId()); System.out.println(" - Title: " + returnedVideo.getSnippet().getTitle()); System.out.println(" - Tags: " + returnedVideo.getSnippet().getTags()); System.out.println(" - Privacy Status: " + returnedVideo.getStatus().getPrivacyStatus()); System.out.println(" - Video Count: " + returnedVideo.getStatistics().getViewCount()); } catch (GoogleJsonResponseException e) { System.err.println("GoogleJsonResponseException code: " + e.getDetails().getCode() + " : " + e.getDetails().getMessage()); e.printStackTrace(); } catch (IOException e) { System.err.println("IOException: " + e.getMessage()); e.printStackTrace(); } catch (Throwable t) { System.err.println("Throwable: " + t.getMessage()); t.printStackTrace(); } }
From source file:org.ctrlr.kom.examples.JavaDirectKafkaWordCount.java
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: DirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1);/* www . jav a 2 s . co m*/ } String brokers = args[0]; String topics = args[1]; /** Setup Hbase configuration. 1.4 will support SPARK-6918 */ final Configuration hbaseConfiguration = HBaseConfiguration.create(); /** Keep track of offsets*/ final AtomicReference<OffsetRange[]> offsetRanges = new AtomicReference<>(); final IOffsetDao dao = new Hbase1OffsetStore.Builder().setHbaseConfiguration(hbaseConfiguration) .setOffsetTable("kafkaoffsettable").build(); final KafkaOffsetManager osm = new KafkaOffsetManager.Builder().setOffsetManager(dao) .setKafkaBrokerList("localhost:9092").setGroupID("testGroupID").setTopic("kafkaTestTopic").build(); /** Get offsets or start at beginning. getLatestOffsets is also an option. */ Map<TopicAndPartition, Long> offsetMap = osm.getOffsets(); if (offsetMap.isEmpty()) { offsetMap = osm.getEarliestOffsets(); } // Create context with 2 second batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); /** Creat direct kafka stream using the acquired offsets */ JavaInputDStream<byte[]> messages = KafkaUtils.createDirectStream(jssc, byte[].class, byte[].class, DefaultDecoder.class, DefaultDecoder.class, byte[].class, kafkaParams, offsetMap, new Function<MessageAndMetadata<byte[], byte[]>, byte[]>() { @Override public byte[] call(MessageAndMetadata<byte[], byte[]> messageAndMetadata) throws Exception { return messageAndMetadata.message(); } }); /** Get kafka offsets by a transform, this needs to go first else (HassOffsetRanges) will not work */ JavaDStream<byte[]> lines = messages.transform(new Function<JavaRDD<byte[]>, JavaRDD<byte[]>>() { @Override public JavaRDD<byte[]> call(JavaRDD<byte[]> javaRDD) throws Exception { OffsetRange[] offsets = ((HasOffsetRanges) javaRDD.rdd()).offsetRanges(); offsetRanges.set(offsets); return javaRDD; } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<byte[], String>() { @Override public Iterable<String> call(byte[] x) { return Lists.newArrayList(SPACE.split(new String(x))); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); /** Write offsets to OffsetManager */ lines.foreachRDD(new Function<JavaRDD<byte[]>, Void>() { @Override public Void call(JavaRDD<byte[]> javaRDD) throws Exception { for (OffsetRange o : offsetRanges.get()) { if (o.fromOffset() < o.untilOffset()) { Map<TopicAndPartition, Long> offsets = new HashMap<>(); offsets.put(new TopicAndPartition(o.topic(), o.partition()), o.untilOffset()); osm.setOffsets(offsets); } } return null; } } ); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
From source file:cn.dreampie.ClosureMinifier.java
public static void main(String[] args) { ClosureMinifier minifier = new ClosureMinifier(); Compiler compiler = new Compiler(); Result result = compiler.compile(minifier.getExterns(), Lists.newArrayList( SourceFile.fromCode("a", "function a(){\nvar a=new Array();\nconsole.log(a)\n}")), minifier.getCompilerOptions()); System.out.println(compiler.toSource()); }
From source file:brooklyn.qa.load.SimulatedTheeTierApp.java
public static void main(String[] argv) { List<String> args = Lists.newArrayList(argv); String port = CommandLineUtil.getCommandLineOption(args, "--port", "8081+"); String location = CommandLineUtil.getCommandLineOption(args, "--location", "localhost"); BrooklynLauncher launcher = BrooklynLauncher.newInstance() .application(EntitySpec.create(StartableApplication.class, SimulatedTheeTierApp.class) .displayName("Brooklyn WebApp Cluster with Database example")) .webconsolePort(port).location(location).start(); Entities.dumpInfo(launcher.getApplications()); }
From source file:com.kit.MsgFromkfkToHbase.java
public static void main(String[] args) { String brokers = "172.16.19.151:9092,172.16.19.152:9092,172.16.19.153:9092";//zk? final String topics = "system,Disconnector,Breaker";//?? // //kafka?? // AuthenticationManager.setAuthMethod("kerberos"); // //"/usr/lib/kafka/kafka.keytab" // AuthenticationManager.login("kafka@TDH", TestProducer.class.getClassLoader().getResource("kafka.keytab").getPath() ); // //ww w.j a v a 2s . c o m // Create context with a 2 seconds batch interval //spark? SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");//.setMaster("local[1]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { //?kafka?? ? ???topic ??hbase?? @SuppressWarnings("unchecked") public Iterable<String> call(String x) { String[] kafka = SPACE.split(x); pareDataToHabse(kafka); // HbaseTest.insertData("test-kit",topics, Bytes.toBytes(num).toString()); /* system Map map = new HashMap(); Map infomap = new HashMap(); infomap.put("Area", kafka[0]); infomap.put("System", kafka[1]); infomap.put("Time", kafka[2]); infomap.put("Readtime", kafka[3]); map.put("info", infomap); HbaseUtil.add("system", topics+UUID.randomUUID(), map);*/ // Disconnector // Map map = new HashMap(); // Map infomap = new HashMap(); // infomap.put("factoryname", kafka[0]); // infomap.put("dzname", kafka[1]); // infomap.put("dzid", kafka[2]); // infomap.put("dzstatus", kafka[3]); // infomap.put("dissecnode", kafka[4]); // map.put("info", infomap); // HbaseUtil.add("Disconnector", topics+UUID.randomUUID(), map); // Map map = new HashMap(); // Map infomap = new HashMap(); // infomap.put("factoryname", kafka[0]); // infomap.put("Ocname", kafka[1]); // infomap.put("ocid", kafka[2]); // infomap.put("ocstatus", kafka[3]); // infomap.put("oscsecnode", kafka[4]); // // infomap.put("Elea", kafka[5]); // infomap.put("Eleb", kafka[6]); // infomap.put("Elec", kafka[7]); // infomap.put("pname", kafka[8]); // map.put("info", infomap); // HbaseUtil.add("Breaker", topics+UUID.randomUUID(), map); //???kafka? return Lists.newArrayList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); /** * ?kafka?? */ wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
From source file:com.example.JavaKinesisWordCountASL.java
public static void main(String[] args) { // Populate the appropriate variables from the given args String kinesisAppName = "testApp"; String streamName = "test"; String endpointUrl = "kinesis.us-east-1.amazonaws.com"; // Create a Kinesis client in order to determine the number of shards for the given stream AmazonKinesisClient kinesisClient = new AmazonKinesisClient( CredentialsProvider.getAwsSessionCredentialsProvider()); kinesisClient.setEndpoint(endpointUrl); int numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size(); // In this com, we're going to create 1 Kinesis Receiver/input DStream for each shard. // This is not a necessity; if there are less receivers/DStreams than the number of shards, // then the shards will be automatically distributed among the receivers and each receiver // will receive data from multiple shards. int numStreams = numShards; // Spark Streaming batch interval Duration batchInterval = new Duration(2000); // Kinesis checkpoint interval. Same as batchInterval for this com. Duration kinesisCheckpointInterval = batchInterval; // Get the region name from the endpoint URL to save Kinesis Client Library metadata in // DynamoDB of the same region as the Kinesis stream String regionName = RegionUtils.getRegionByEndpoint(endpointUrl).getName(); // Setup the Spark config and StreamingContext SparkConf sparkConfig = new SparkConf().setAppName("JavaKinesisWordCountASL").setMaster("local[2]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval); // Create the Kinesis DStreams List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams); for (int i = 0; i < numStreams; i++) { streamsList.add(KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName, InitialPositionInStream.TRIM_HORIZON, kinesisCheckpointInterval, StorageLevel.MEMORY_AND_DISK_2())); }/*from w ww .j ava 2 s .c o m*/ // Union all the streams if there is more than 1 stream JavaDStream<byte[]> unionStreams; if (streamsList.size() > 1) { unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); } else { // Otherwise, just use the 1 stream unionStreams = streamsList.get(0); } // Convert each line of Array[Byte] to String, and split into words JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() { public Iterable<String> call(byte[] line) { return Lists.newArrayList(WORD_SEPARATOR.split(new String(line))); } }); // Map each word to a (word, 1) tuple so we can reduce by key to count the words JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); // Print the first 10 wordCounts wordCounts.print(); // Start the streaming context and await termination jssc.start(); jssc.awaitTermination(); }
From source file:org.ros.internal.message.GenerateInterfaces.java
public static void main(String[] args) { List<String> arguments = Lists.newArrayList(args); if (arguments.size() == 0) { arguments.add("."); }//from w w w . j av a 2s . co m String rosPackagePath = System.getenv(EnvironmentVariables.ROS_PACKAGE_PATH); Collection<File> packagePath = Lists.newArrayList(); for (String path : rosPackagePath.split(File.pathSeparator)) { File packageDirectory = new File(path); if (packageDirectory.exists()) { packagePath.add(packageDirectory); } } GenerateInterfaces generateInterfaces = new GenerateInterfaces(); File outputDirectory = new File(arguments.remove(0)); generateInterfaces.generate(outputDirectory, arguments, packagePath); }
From source file:io.druid.server.sql.SQLRunner.java
public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption("h", "help", false, "help"); options.addOption("v", false, "verbose"); options.addOption("e", "host", true, "endpoint [hostname:port]"); CommandLine cmd = new GnuParser().parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SQLRunner", options); System.exit(2);//w w w . jav a 2s. com } String hostname = cmd.getOptionValue("e", "localhost:8080"); String sql = cmd.getArgs().length > 0 ? cmd.getArgs()[0] : STATEMENT; ObjectMapper objectMapper = new DefaultObjectMapper(); ObjectWriter jsonWriter = objectMapper.writerWithDefaultPrettyPrinter(); CharStream stream = new ANTLRInputStream(sql); DruidSQLLexer lexer = new DruidSQLLexer(stream); TokenStream tokenStream = new CommonTokenStream(lexer); DruidSQLParser parser = new DruidSQLParser(tokenStream); lexer.removeErrorListeners(); parser.removeErrorListeners(); lexer.addErrorListener(ConsoleErrorListener.INSTANCE); parser.addErrorListener(ConsoleErrorListener.INSTANCE); try { DruidSQLParser.QueryContext queryContext = parser.query(); if (parser.getNumberOfSyntaxErrors() > 0) throw new IllegalStateException(); // parser.setBuildParseTree(true); // System.err.println(q.toStringTree(parser)); } catch (Exception e) { String msg = e.getMessage(); if (msg != null) System.err.println(e); System.exit(1); } final Query query; final TypeReference typeRef; boolean groupBy = false; if (parser.groupByDimensions.isEmpty()) { query = Druids.newTimeseriesQueryBuilder().dataSource(parser.getDataSource()) .aggregators(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .postAggregators(parser.postAggregators).intervals(parser.intervals) .granularity(parser.granularity).filters(parser.filter).build(); typeRef = new TypeReference<List<Result<TimeseriesResultValue>>>() { }; } else { query = GroupByQuery.builder().setDataSource(parser.getDataSource()) .setAggregatorSpecs(new ArrayList<AggregatorFactory>(parser.aggregators.values())) .setPostAggregatorSpecs(parser.postAggregators).setInterval(parser.intervals) .setGranularity(parser.granularity).setDimFilter(parser.filter) .setDimensions(new ArrayList<DimensionSpec>(parser.groupByDimensions.values())).build(); typeRef = new TypeReference<List<Row>>() { }; groupBy = true; } String queryStr = jsonWriter.writeValueAsString(query); if (cmd.hasOption("v")) System.err.println(queryStr); URL url = new URL(String.format("http://%s/druid/v2/?pretty", hostname)); final URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("content-type", MediaType.APPLICATION_JSON); urlConnection.getOutputStream().write(StringUtils.toUtf8(queryStr)); BufferedReader stdInput = new BufferedReader( new InputStreamReader(urlConnection.getInputStream(), Charsets.UTF_8)); Object res = objectMapper.readValue(stdInput, typeRef); Joiner tabJoiner = Joiner.on("\t"); if (groupBy) { List<Row> rows = (List<Row>) res; Iterable<String> dimensions = Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return input.getOutputName(); } }); System.out.println( tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), dimensions, parser.fields))); for (final Row r : rows) { System.out.println(tabJoiner.join(Iterables.concat( Lists.newArrayList(parser.granularity.toDateTime(r.getTimestampFromEpoch())), Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>() { @Override public String apply(@Nullable DimensionSpec input) { return Joiner.on(",").join(r.getDimension(input.getOutputName())); } }), Iterables.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getFloatMetric(input); } })))); } } else { List<Result<TimeseriesResultValue>> rows = (List<Result<TimeseriesResultValue>>) res; System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList("timestamp"), parser.fields))); for (final Result<TimeseriesResultValue> r : rows) { System.out.println(tabJoiner.join(Iterables.concat(Lists.newArrayList(r.getTimestamp()), Lists.transform(parser.fields, new Function<String, Object>() { @Override public Object apply(@Nullable String input) { return r.getValue().getMetric(input); } })))); } } CloseQuietly.close(stdInput); }
From source file:org.apache.spark.examples.streaming.JavaKinesisWordCountASL.java
public static void main(String[] args) { /* Check that all required args were passed in. */ if (args.length < 2) { System.err.println("Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>\n" + " <stream-name> is the name of the Kinesis stream\n" + " <endpoint-url> is the endpoint of the Kinesis service\n" + " (e.g. https://kinesis.us-east-1.amazonaws.com)\n"); System.exit(1);/* w ww . jav a 2 s. co m*/ } StreamingExamples.setStreamingLogLevels(); /* Populate the appropriate variables from the given args */ String streamName = args[0]; String endpointUrl = args[1]; /* Set the batch interval to a fixed 2000 millis (2 seconds) */ Duration batchInterval = new Duration(2000); /* Create a Kinesis client in order to determine the number of shards for the given stream */ AmazonKinesisClient kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain()); kinesisClient.setEndpoint(endpointUrl); /* Determine the number of shards from the stream */ int numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size(); /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ int numStreams = numShards; /* Must add 1 more thread than the number of receivers or the output won't show properly from the driver */ int numSparkThreads = numStreams + 1; /* Setup the Spark config. */ SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount") .setMaster("local[" + numSparkThreads + "]"); /* Kinesis checkpoint interval. Same as batchInterval for this example. */ Duration checkpointInterval = batchInterval; /* Setup the StreamingContext */ JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval); /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */ List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams); for (int i = 0; i < numStreams; i++) { streamsList.add(KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())); } /* Union all the streams if there is more than 1 stream */ JavaDStream<byte[]> unionStreams; if (streamsList.size() > 1) { unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); } else { /* Otherwise, just use the 1 stream */ unionStreams = streamsList.get(0); } /* * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection. * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR. */ JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() { @Override public Iterable<String> call(byte[] line) { return Lists.newArrayList(WORD_SEPARATOR.split(new String(line))); } }); /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */ JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); /* Print the first 10 wordCounts */ wordCounts.print(); /* Start the streaming context and await termination */ jssc.start(); jssc.awaitTermination(); }