List of usage examples for org.joda.time Duration standardSeconds
public static Duration standardSeconds(long seconds)
From source file:com.dataartisans.flink.dataflow.examples.streaming.KafkaWindowedWordCountExample.java
License:Apache License
public static void main(String[] args) { PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class); KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args) .as(KafkaStreamingWordCountOptions.class); options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds"); options.setStreaming(true);//from ww w. j a va 2s . co m options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); System.out.println(options.getKafkaTopic() + " " + options.getZookeeper() + " " + options.getBroker() + " " + options.getGroup()); Pipeline pipeline = Pipeline.create(options); Properties p = new Properties(); p.setProperty("zookeeper.connect", options.getZookeeper()); p.setProperty("bootstrap.servers", options.getBroker()); p.setProperty("group.id", options.getGroup()); // this is the Flink consumer that reads the input to // the program from a kafka topic. FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p); PCollection<String> words = pipeline .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount")) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt")); pipeline.run(); }
From source file:com.dataartisans.flink.dataflow.examples.streaming.WindowedWordCount.java
License:Apache License
public static void main(String[] args) throws IOException { StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(StreamingWordCountOptions.class); options.setStreaming(true);/*from w ww . ja v a 2 s . c o m*/ options.setWindowSize(10L); options.setSlide(5L); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() + " sec. and a slide of " + options.getSlide()); Pipeline pipeline = Pipeline.create(options); PCollection<String> words = pipeline .apply(Read .from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("StreamingWordCount")) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window .<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize())) .every(Duration.standardSeconds(options.getSlide()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputWordCount.txt")); pipeline.run(); }
From source file:com.datatorrent.benchmark.window.AbstractWindowedOperatorBenchmarkApp.java
License:Apache License
protected O createWindowedOperator(Configuration conf) { SpillableStateStore store = createStore(conf); try {/* ww w . ja va2s .co m*/ O windowedOperator = this.windowedOperatorClass.newInstance(); SpillableComplexComponentImpl sccImpl = new SpillableComplexComponentImpl(store); windowedOperator.addComponent("SpillableComplexComponent", sccImpl); windowedOperator.setDataStorage(createDataStorage(sccImpl)); windowedOperator.setRetractionStorage(createRetractionStorage(sccImpl)); windowedOperator.setWindowStateStorage(new InMemoryWindowedStorage()); setUpdatedKeyStorage(windowedOperator, conf, sccImpl); windowedOperator.setAccumulation(createAccumulation()); windowedOperator.setAllowedLateness(Duration.millis(ALLOWED_LATENESS)); windowedOperator.setWindowOption(new WindowOption.TimeWindows(Duration.standardMinutes(1))); //accumulating mode windowedOperator.setTriggerOption( TriggerOption.AtWatermark().withEarlyFiringsAtEvery(Duration.standardSeconds(1)) .accumulatingFiredPanes().firingOnlyUpdatedPanes()); windowedOperator.setFixedWatermark(30000); //windowedOperator.setTriggerOption(TriggerOption.AtWatermark()); return windowedOperator; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.example.vision.Detect.java
License:Apache License
/** * Detects faces in the specified remote image. * * @param gcsPath The path to the remote file to perform face detection on. * @param out A {@link PrintStream} to write detected features to. * @throws IOException on Input/Output errors. *//* w w w .j av a 2s.c o m*/ public static void detectFacesGcs(String gcsPath, PrintStream out) throws IOException { List<AnnotateImageRequest> requests = new ArrayList<>(); ImageAnnotatorSettings.Builder imageAnnotatorSettingsBuilder = ImageAnnotatorSettings.defaultBuilder(); imageAnnotatorSettingsBuilder.batchAnnotateImagesSettings().getRetrySettingsBuilder() .setTotalTimeout(Duration.standardSeconds(30)); ImageAnnotatorSettings settings = imageAnnotatorSettingsBuilder.build(); ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build(); Image img = Image.newBuilder().setSource(imgSource).build(); Feature feat = Feature.newBuilder().setType(Type.FACE_DETECTION).build(); AnnotateImageRequest request = AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build(); requests.add(request); ImageAnnotatorClient client = ImageAnnotatorClient.create(settings); BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests); List<AnnotateImageResponse> responses = response.getResponsesList(); for (AnnotateImageResponse res : responses) { if (res.hasError()) { out.printf("Error: %s\n", res.getError().getMessage()); return; } // For full list of available annotations, see http://g.co/cloud/vision/docs for (FaceAnnotation annotation : res.getFaceAnnotationsList()) { out.printf("anger: %s\njoy: %s\nsurprise: %s\nposition: %s", annotation.getAngerLikelihood(), annotation.getJoyLikelihood(), annotation.getSurpriseLikelihood(), annotation.getBoundingPoly()); } } }
From source file:com.github.jcustenborder.kafka.connect.cdc.docker.DockerExtension.java
License:Apache License
@Override public void beforeAll(ContainerExtensionContext containerExtensionContext) throws Exception { Class<?> testClass = containerExtensionContext.getTestClass().get(); ExtensionContext.Namespace namespace = namespace(containerExtensionContext); DockerCompose dockerCompose = findDockerComposeAnnotation(containerExtensionContext); ExtensionContext.Store store = containerExtensionContext.getStore(namespace); DockerComposeRule.Builder builder = DockerComposeRule.builder(); builder.file(dockerCompose.dockerComposePath()); File logPathRoot = new File(dockerCompose.logPath()); File testClassLogPath = new File(logPathRoot, testClass.getName()); log.trace("Setting log path for docker compose to {}", testClassLogPath.getAbsolutePath()); builder.saveLogsTo(testClassLogPath.getAbsolutePath()); ClusterHealthCheck clusterHealthCheck = dockerCompose.clusterHealthCheck().newInstance(); ClusterWait clusterWait = new ClusterWait(clusterHealthCheck, Duration.standardSeconds(dockerCompose.clusterHealthCheckTimeout())); builder.addClusterWait(clusterWait); DockerComposeRule dockerComposeRule = builder.build(); store.put(STORE_SLOT_RULE, dockerComposeRule); dockerComposeRule.before();/*from ww w . ja v a 2 s .c o m*/ }
From source file:com.google.cloud.dataflow.examples.AutoComplete.java
License:Apache License
public static void main(String[] args) throws IOException { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); if (options.isStreaming()) { // In order to cancel the pipelines automatically, // {@literal DataflowPipelineRunner} is forced to be used. options.setRunner(DataflowPipelineRunner.class); }//w ww .j a v a 2 s . c o m options.setBigQuerySchema(FormatForBigquery.getSchema()); DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options); // We support running the same pipeline in either // batch or windowed streaming mode. PTransform<? super PBegin, PCollection<String>> readSource; WindowFn<Object, ?> windowFn; if (options.isStreaming()) { Preconditions.checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming."); dataflowUtils.setupPubsubTopic(); readSource = PubsubIO.Read.topic(options.getPubsubTopic()); windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5)); } else { readSource = TextIO.Read.from(options.getInputFile()); windowFn = new GlobalWindows(); } // Create the pipeline. Pipeline p = Pipeline.create(options); PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource) .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn)) .apply(ComputeTopCompletions.top(10, options.getRecursive())); if (options.getOutputToDatastore()) { toWrite.apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind()))) .apply(DatastoreIO.writeTo(options.getProject())); } if (options.getOutputToBigQuery()) { dataflowUtils.setupBigQueryTable(); TableReference tableRef = new TableReference(); tableRef.setProjectId(options.getProject()); tableRef.setDatasetId(options.getBigQueryDataset()); tableRef.setTableId(options.getBigQueryTable()); toWrite.apply(ParDo.of(new FormatForBigquery())) .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); } // Run the pipeline. PipelineResult result = p.run(); if (options.isStreaming() && !options.getInputFile().isEmpty()) { // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline. dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic()); } // dataflowUtils will try to cancel the pipeline and the injector before the program exists. dataflowUtils.waitToFinish(result); }
From source file:com.google.cloud.dataflow.examples.complete.AutoComplete.java
License:Apache License
public static void main(String[] args) throws IOException { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); if (options.isStreaming()) { // In order to cancel the pipelines automatically, // {@literal DataflowPipelineRunner} is forced to be used. options.setRunner(DataflowPipelineRunner.class); }//from w w w . ja v a 2s. co m options.setBigQuerySchema(FormatForBigquery.getSchema()); DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options); // We support running the same pipeline in either // batch or windowed streaming mode. PTransform<? super PBegin, PCollection<String>> readSource; WindowFn<Object, ?> windowFn; if (options.isStreaming()) { Preconditions.checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming."); dataflowUtils.setupPubsub(); readSource = PubsubIO.Read.topic(options.getPubsubTopic()); windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5)); } else { readSource = TextIO.Read.from(options.getInputFile()); windowFn = new GlobalWindows(); } // Create the pipeline. Pipeline p = Pipeline.create(options); PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource) .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn)) .apply(ComputeTopCompletions.top(10, options.getRecursive())); if (options.getOutputToDatastore()) { toWrite.apply(ParDo.named("FormatForDatastore") .of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))) .apply(DatastoreIO.v1().write().withProjectId( MoreObjects.firstNonNull(options.getOutputDataset(), options.getProject()))); } if (options.getOutputToBigQuery()) { dataflowUtils.setupBigQueryTable(); TableReference tableRef = new TableReference(); tableRef.setProjectId(options.getProject()); tableRef.setDatasetId(options.getBigQueryDataset()); tableRef.setTableId(options.getBigQueryTable()); toWrite.apply(ParDo.of(new FormatForBigquery())) .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition( options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); } // Run the pipeline. PipelineResult result = p.run(); if (options.isStreaming() && !options.getInputFile().isEmpty()) { // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline. dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic()); } // dataflowUtils will try to cancel the pipeline and the injector before the program exists. dataflowUtils.waitToFinish(result); }
From source file:com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows.java
License:Apache License
static Duration getDefaultPeriod(Duration size) { if (size.isLongerThan(Duration.standardHours(1))) { return Duration.standardHours(1); }// w w w.ja v a 2 s. co m if (size.isLongerThan(Duration.standardMinutes(1))) { return Duration.standardMinutes(1); } if (size.isLongerThan(Duration.standardSeconds(1))) { return Duration.standardSeconds(1); } return Duration.millis(1); }
From source file:com.google.cloud.dataflow.tutorials.game.Exercise7.java
License:Apache License
public static void main(String[] args) throws Exception { Exercise7Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Exercise7Options.class); // Enforce that this pipeline is always run in streaming mode. options.setStreaming(true);//from w ww . j a v a 2 s. c om // Allow the pipeline to be cancelled automatically. options.setRunner(DataflowPipelineRunner.class); Pipeline pipeline = Pipeline.create(options); TableReference badUserTable = new TableReference(); badUserTable.setDatasetId(options.getOutputDataset()); badUserTable.setProjectId(options.getProject()); badUserTable.setTableId(options.getOutputTableName() + "_bad_users"); // 1. Read game events with message id and timestamp // 2. Parse events // 3. Key by event id // 4. Sessionize. PCollection<KV<String, GameEvent>> sessionedEvents = null; /* TODO: YOUR CODE GOES HERE */ // 1. Read play events with message id and timestamp // 2. Parse events // 3. Key by event id // 4. Sessionize. PCollection<KV<String, PlayEvent>> sessionedPlayEvents = null; /* TODO: YOUR CODE GOES HERE */ // 1. Join events // 2. Compute latency using ComputeLatencyFn PCollection<KV<String, Long>> userLatency = null; /* TODO: YOUR CODE GOES HERE */ // 1. Get the values of userLatencies // 2. Re-window into GlobalWindows with periodic repeated triggers // 3. Compute global approximate quantiles with fanout PCollectionView<List<Long>> globalQuantiles = null; /* TODO: YOUR CODE GOES HERE */ userLatency // Use the computed latency distribution as a side-input to filter out likely bad users. .apply("DetectBadUsers", ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() { public void processElement(ProcessContext c) { /* TODO: YOUR CODE GOES HERE */ throw new RuntimeException("Not implemented"); } })) // We want to only emilt a single BigQuery row for every bad user. To do this, we // re-key by user, then window globally and trigger on the first element for each key. .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class))) .apply("GlobalWindowsTriggerOnFirst", Window.<KV<String, String>>into(new GlobalWindows()) .triggering(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(10))) .accumulatingFiredPanes()) .apply("GroupByUser", GroupByKey.<String, String>create()) .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers", BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the // command line. PipelineResult result = pipeline.run(); }
From source file:com.google.cloud.dataflow.tutorials.game.Exercise8.java
License:Apache License
public static void main(String[] args) throws Exception { Exercise8Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Exercise8Options.class); // Enforce that this pipeline is always run in streaming mode. options.setStreaming(true);/*from w w w . j a v a 2 s .c o m*/ // Allow the pipeline to be cancelled automatically. options.setRunner(DataflowPipelineRunner.class); Pipeline pipeline = Pipeline.create(options); TableReference badUserTable = new TableReference(); badUserTable.setDatasetId(options.getOutputDataset()); badUserTable.setProjectId(options.getProject()); badUserTable.setTableId(options.getOutputTableName() + "_bad_users"); // Read Events from Pub/Sub using custom timestamps and custom message id label. PCollection<KV<String, GameEvent>> sessionedEvents = pipeline .apply("ReadGameScoreEvents", PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE) .topic(options.getTopic())) .apply("ParseGameScoreEvents", ParDo.of(new BuggyParseEventFn())) .apply("KeyGameScoreByEventId", WithKeys.of((GameEvent event) -> event.getEventId()) .withKeyType(TypeDescriptor.of(String.class))) .apply("SessionizeGameScoreEvents", Window.<KV<String, GameEvent>>into( Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())); // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label. PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline .apply("ReadGamePlayEvents", PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE) .topic(options.getPlayEventsTopic())) .apply("ParseGamePlayEvents", ParDo.of(new BuggyParsePlayEventFn())) .apply("KeyGamePlayByEventId", WithKeys.of((PlayEvent play) -> play.getEventId()) .withKeyType(TypeDescriptor.of(String.class))) .apply("SessionizeGamePlayEvents", Window.<KV<String, PlayEvent>>into( Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())); // Compute per-user latency. PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents) .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create()) .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn())); // Create a view onto quantiles of the global latency distribution. PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create()) // Re-window session results into a global window, and trigger periodically making sure // to use the full accumulated window contents. .apply("GlobalWindowRetrigger", Window.<Long>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(GLOBAL_AGGREGATE_TRIGGER_SEC)))) .accumulatingFiredPanes()) .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT) .asSingletonView()); userLatency // Use the computed latency distribution as a side-input to filter out likely bad users. .apply("DetectBadUsers", ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() { public void processElement(ProcessContext c) { String user = c.element().getKey(); Long latency = c.element().getValue(); List<Long> quantiles = c.sideInput(globalQuantiles); // Users in the first quantile are considered spammers, since their // score to play event latency is too low, suggesting a robot. if (latency < quantiles.get(1)) { c.output(user); } } })) // We want to only emilt a single BigQuery row for every bad user. To do this, we // re-key by user, then window globally and trigger on the first element for each key. .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class))) .apply("GlobalWindowsTriggerOnFirst", Window.<KV<String, String>>into(new GlobalWindows()) .triggering(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(10))) .accumulatingFiredPanes()) .apply("GroupByUser", GroupByKey.<String, String>create()) .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers", BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); userLatency.apply("ReKeyFn", // BUG4: We have a hot key. Especially when the cost of downstream fn is high, must // ensure we have good sharding. WithKeys.of((KV<String, Long> item) -> "").withKeyType(TypeDescriptor.of(String.class))) .apply("WindowAndTriggerOften", Window.<KV<String, KV<String, Long>>>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(10)))) .discardingFiredPanes()) .apply("GroupByNewKey", GroupByKey.<String, KV<String, Long>>create()) .apply("DoExpensiveWork", ParDo.of(new ExpensiveWorkPerElement())); // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the // command line. PipelineResult result = pipeline.run(); }