List of usage examples for org.joda.time Duration standardSeconds
public static Duration standardSeconds(long seconds)
From source file:org.apache.apex.malhar.stream.sample.complete.TwitterAutoComplete.java
License:Apache License
/** * Populate the dag with High-Level API. * @param dag/*from www. j a v a 2 s . com*/ * @param conf */ @Override public void populateDAG(DAG dag, Configuration conf) { TwitterSampleInput input = new TwitterSampleInput(); WindowOption windowOption = new WindowOption.GlobalWindow(); ApexStream<String> tags = StreamFactory.fromInput(input, input.text, name("tweetSampler")) .filter(new ASCIIFilter(), name("ACSII Filter")) .flatMap(new ExtractHashtags(), name("Extract Hashtags")); ApexStream<Tuple.WindowedTuple<KeyValPair<String, List<CompletionCandidate>>>> s = tags .window(windowOption, new TriggerOption().accumulatingFiredPanes() .withEarlyFiringsAtEvery(Duration.standardSeconds(10))) .addCompositeStreams(ComputeTopCompletions.top(10, true)).print(); s.populateDag(dag); }
From source file:org.apache.apex.malhar.stream.sample.cookbook.DeDupExample.java
License:Apache License
@Override public void populateDAG(DAG dag, Configuration conf) { Collector collector = new Collector(); // Create a stream that reads from files in a local folder and output lines one by one to downstream. ApexStream<String> stream = StreamFactory.fromFolder("./src/test/resources/wordcount", name("textInput")) // Extract all the words from the input line of text. .flatMap(new Function.FlatMapFunction<String, String>() { @Override/* w ww. ja va 2s .co m*/ public Iterable<String> f(String input) { return Arrays.asList(input.split("[\\p{Punct}\\s]+")); } }, name("ExtractWords")) // Change the words to lower case, also shutdown the app when the word "bye" is detected. .map(new Function.MapFunction<String, String>() { @Override public String f(String input) { return input.toLowerCase(); } }, name("ToLowerCase")); // Apply window and trigger option. stream.window(new WindowOption.GlobalWindow(), new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(Duration.standardSeconds(1))) // Remove the duplicate words and print out the result. .accumulate(new RemoveDuplicates<String>(), name("RemoveDuplicates")).print(name("console")) .endWith(collector, collector.input).populateDag(dag); }
From source file:org.apache.beam.examples.complete.AutoComplete.java
License:Apache License
public static void runAutocompletePipeline(Options options) throws IOException { options.setBigQuerySchema(FormatForBigquery.getSchema()); ExampleUtils exampleUtils = new ExampleUtils(options); // We support running the same pipeline in either // batch or windowed streaming mode. WindowFn<Object, ?> windowFn; if (options.isStreaming()) { checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming."); windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5)); } else {//from w w w . ja va2 s . c o m windowFn = new GlobalWindows(); } // Create the pipeline. Pipeline p = Pipeline.create(options); PCollection<KV<String, List<CompletionCandidate>>> toWrite = p .apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())) .apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive())); if (options.getOutputToDatastore()) { toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))) .apply(DatastoreIO.v1().write().withProjectId( MoreObjects.firstNonNull(options.getOutputProject(), options.getProject()))); } if (options.getOutputToBigQuery()) { exampleUtils.setupBigQueryTable(); TableReference tableRef = new TableReference(); tableRef.setProjectId(options.getProject()); tableRef.setDatasetId(options.getBigQueryDataset()); tableRef.setTableId(options.getBigQueryTable()); toWrite.apply(ParDo.of(new FormatForBigquery())) .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition( options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); } if (options.getOutputToChecksum()) { PCollection<Long> checksum = toWrite .apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() { @ProcessElement public void process(ProcessContext c) { KV<String, List<CompletionCandidate>> elm = c.element(); Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum(); c.output(Long.valueOf(elm.getKey().hashCode()) + listHash); } })).apply(Sum.longsGlobally()); PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum()); } // Run the pipeline. PipelineResult result = p.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exists. exampleUtils.waitToFinish(result); }
From source file:org.apache.beam.examples.snippets.Snippets.java
License:Apache License
public static void fileProcessPattern() throws Exception { Pipeline p = Pipeline.create();/* w w w. j a va 2 s .c o m*/ // [START FileProcessPatternProcessNewFilesSnip1] // This produces PCollection<MatchResult.Metadata> p.apply(FileIO.match().filepattern("...").continuously(Duration.standardSeconds(30), Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1)))); // [END FileProcessPatternProcessNewFilesSnip1] // [START FileProcessPatternProcessNewFilesSnip2] // This produces PCollection<String> p.apply(TextIO.read().from("<path-to-files>/*").watchForNewFiles( // Check for new files every minute. Duration.standardMinutes(1), // Stop watching the file pattern if no new files appear for an hour. Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1)))); // [END FileProcessPatternProcessNewFilesSnip2] // [START FileProcessPatternAccessMetadataSnip1] p.apply(FileIO.match().filepattern("hdfs://path/to/*.gz")) // The withCompression method is optional. By default, the Beam SDK detects compression from // the filename. .apply(FileIO.readMatches().withCompression(Compression.GZIP)) .apply(ParDo.of(new DoFn<FileIO.ReadableFile, String>() { @ProcessElement public void process(@Element FileIO.ReadableFile file) { // We can now access the file and its metadata. LOG.info("File Metadata resourceId is {} ", file.getMetadata().resourceId()); } })); // [END FileProcessPatternAccessMetadataSnip1] }
From source file:org.apache.beam.examples.snippets.Snippets.java
License:Apache License
public static void sideInputPatterns() { // This pipeline uses View.asSingleton for a placeholder external service. // Run in debug mode to see the output. Pipeline p = Pipeline.create();/* w w w . j a va 2 s . co m*/ // Create a side input that updates each second. PCollectionView<Map<String, String>> map = p .apply(GenerateSequence.from(0).withRate(1, Duration.standardSeconds(5L))) .apply(Window.<Long>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) .discardingFiredPanes()) .apply(ParDo.of(new DoFn<Long, Map<String, String>>() { @ProcessElement public void process(@Element Long input, OutputReceiver<Map<String, String>> o) { // Replace map with test data from the placeholder external service. // Add external reads here. o.output(PlaceholderExternalService.readTestData()); } })).apply(View.asSingleton()); // Consume side input. GenerateSequence generates test data. // Use a real source (like PubSubIO or KafkaIO) in production. p.apply(GenerateSequence.from(0).withRate(1, Duration.standardSeconds(1L))) .apply(Window.into(FixedWindows.of(Duration.standardSeconds(1)))) .apply(Sum.longsGlobally().withoutDefaults()).apply(ParDo.of(new DoFn<Long, KV<Long, Long>>() { @ProcessElement public void process(ProcessContext c) { Map<String, String> keyMap = c.sideInput(map); c.outputWithTimestamp(KV.of(1L, c.element()), Instant.now()); LOG.debug("Value is {}, key A is {}, and key B is {}.", c.element(), keyMap.get("Key_A"), keyMap.get("Key_B")); } }).withSideInputs(map)); }
From source file:org.apache.beam.fn.harness.SplittableProcessElementsRunner.java
License:Apache License
private <PositionT> void processElementTyped(WindowedValue<KV<InputT, RestrictionT>> elem) { checkArgument(elem.getWindows().size() == 1, "SPLITTABLE_PROCESS_ELEMENTS expects its input to be in 1 window, but got %s windows", elem.getWindows().size());/*w ww . j a v a 2 s . c om*/ WindowedValue<InputT> element = elem.withValue(elem.getValue().getKey()); BoundedWindow window = elem.getWindows().iterator().next(); this.stateAccessor = new FnApiStateAccessor(context.pipelineOptions, context.ptransformId, context.processBundleInstructionId, context.tagToSideInputSpecMap, context.beamFnStateClient, context.keyCoder, (Coder<BoundedWindow>) context.windowCoder, () -> elem, () -> window); RestrictionTracker<RestrictionT, PositionT> tracker = doFnInvoker .invokeNewTracker(elem.getValue().getValue()); OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, PositionT> processElementInvoker = new OutputAndTimeBoundedSplittableProcessElementInvoker<>( context.doFn, context.pipelineOptions, new OutputWindowedValue<OutputT>() { @Override public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { outputTo(mainOutputConsumers, WindowedValue.of(output, timestamp, windows, pane)); } @Override public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { Collection<FnDataReceiver<WindowedValue<AdditionalOutputT>>> consumers = (Collection) context.localNameToConsumer .get(tag.getId()); if (consumers == null) { throw new IllegalArgumentException(String.format("Unknown output tag %s", tag)); } outputTo(consumers, WindowedValue.of(output, timestamp, windows, pane)); } }, stateAccessor, executor, 10000, Duration.standardSeconds(10)); SplittableProcessElementInvoker<InputT, OutputT, RestrictionT, PositionT>.Result result = processElementInvoker .invokeProcessElement(doFnInvoker, element, tracker); this.stateAccessor = null; if (result.getContinuation().shouldResume()) { WindowedValue<KV<InputT, RestrictionT>> primary = element .withValue(KV.of(element.getValue(), tracker.currentRestriction())); WindowedValue<KV<InputT, RestrictionT>> residual = element .withValue(KV.of(element.getValue(), result.getResidualRestriction())); ByteString.Output primaryBytes = ByteString.newOutput(); ByteString.Output residualBytes = ByteString.newOutput(); try { inputCoder.encode(primary, primaryBytes); inputCoder.encode(residual, residualBytes); } catch (IOException e) { throw new RuntimeException(e); } BundleApplication primaryApplication = BundleApplication.newBuilder() .setPtransformId(context.ptransformId).setInputId(mainInputId) .setElement(primaryBytes.toByteString()).build(); BundleApplication residualApplication = BundleApplication.newBuilder() .setPtransformId(context.ptransformId).setInputId(mainInputId) .setElement(residualBytes.toByteString()).build(); context.splitListener.split(ImmutableList.of(primaryApplication), ImmutableList.of(DelayedBundleApplication.newBuilder().setApplication(residualApplication) .setRequestedExecutionTime(Timestamps.fromMillis(System.currentTimeMillis() + result.getContinuation().resumeDelay().getMillis())) .build())); } }
From source file:org.apache.beam.learning.katas.triggers.earlytriggers.GenerateEvent.java
License:Apache License
public PCollection<String> expand(PBegin input) { return input.apply(GenerateSequence.from(1).withRate(1, Duration.standardSeconds(1))) .apply(MapElements.into(strings()).via(num -> "event")); }
From source file:org.apache.beam.learning.katas.triggers.eventtimetriggers.Task.java
License:Apache License
static PCollection<Long> applyTransform(PCollection<String> events) { return events .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(5))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply(Combine.globally(Count.<String>combineFn()).withoutDefaults()); }
From source file:org.apache.beam.runners.apex.translation.operators.ApexParDoOperator.java
License:Apache License
@Override public void setup(OperatorContext context) { this.traceTuples = ApexStreamTuple.Logging .isDebugEnabled(pipelineOptions.get().as(ApexPipelineOptions.class), this); SideInputReader sideInputReader = NullSideInputReader.of(sideInputs); if (!Iterables.isEmpty(sideInputs)) { sideInputHandler = new SideInputHandler(Lists.newArrayList(sideInputs), sideInputStateInternals); sideInputReader = sideInputHandler; }//from w ww. j a v a2 s . co m for (int i = 0; i < additionalOutputTags.size(); i++) { @SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i]; additionalOutputPortMapping.put(additionalOutputTags.get(i), port); } NoOpStepContext stepContext = new NoOpStepContext() { @Override public StateInternals stateInternals() { return currentKeyStateInternals; } @Override public TimerInternals timerInternals() { return currentKeyTimerInternals; } }; DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); doFnInvoker = DoFnInvokers.invokerFor(doFn); doFnInvoker.invokeSetup(); if (this.currentKeyStateInternals != null) { StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer( stepContext.timerInternals(), windowingStrategy); @SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder(); @SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>( doFn, stepContext.stateInternals(), windowCoder); doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner); } pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, Lists.newArrayList(sideInputs), sideInputHandler); if (doFn instanceof ProcessFn) { @SuppressWarnings("unchecked") StateInternalsFactory<byte[]> stateInternalsFactory = (StateInternalsFactory<byte[]>) this.currentKeyStateInternals .getFactory(); @SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object, Object>> splittableDoFn = (ProcessFn) doFn; splittableDoFn.setStateInternalsFactory(stateInternalsFactory); TimerInternalsFactory<byte[]> timerInternalsFactory = key -> currentKeyTimerInternals; splittableDoFn.setTimerInternalsFactory(timerInternalsFactory); splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() { @Override public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane)); } @Override public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { output(tag, WindowedValue.of(output, timestamp, windows, pane)); } }, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10))); } }
From source file:org.apache.beam.runners.dataflow.BlockingDataflowRunner.java
License:Apache License
/** * {@inheritDoc}/* w ww. ja v a 2s . c o m*/ * * @throws DataflowJobExecutionException if there is an exception during job execution. * @throws DataflowServiceException if there is an exception retrieving information about the job. */ @Override public DataflowPipelineJob run(Pipeline p) { final DataflowPipelineJob job = dataflowRunner.run(p); // We ignore the potential race condition here (Ctrl-C after job submission but before the // shutdown hook is registered). Even if we tried to do something smarter (eg., SettableFuture) // the run method (which produces the job) could fail or be Ctrl-C'd before it had returned a // job. The display of the command to cancel the job is best-effort anyways -- RPC's could fail, // etc. If the user wants to verify the job was cancelled they should look at the job status. Thread shutdownHook = new Thread() { @Override public void run() { LOG.warn( "Job is already running in Google Cloud Platform, Ctrl-C will not cancel it.\n" + "To cancel the job in the cloud, run:\n> {}", MonitoringUtil.getGcloudCancelCommand(options, job.getJobId())); } }; try { Runtime.getRuntime().addShutdownHook(shutdownHook); @Nullable State result; try { result = job.waitUntilFinish(Duration.standardSeconds(BUILTIN_JOB_TIMEOUT_SEC)); } catch (IOException | InterruptedException ex) { if (ex instanceof InterruptedException) { Thread.currentThread().interrupt(); } LOG.debug("Exception caught while retrieving status for job {}", job.getJobId(), ex); throw new DataflowServiceException(job, "Exception caught while retrieving status for job " + job.getJobId(), ex); } if (result == null) { throw new DataflowServiceException(job, "Timed out while retrieving status for job " + job.getJobId()); } LOG.info("Job finished with status {}", result); if (!result.isTerminal()) { throw new IllegalStateException( "Expected terminal state for job " + job.getJobId() + ", got " + result); } if (result == State.DONE) { return job; } else if (result == State.UPDATED) { DataflowPipelineJob newJob = job.getReplacedByJob(); LOG.info("Job {} has been updated and is running as the new job with id {}." + "To access the updated job on the Dataflow monitoring console, please navigate to {}", job.getJobId(), newJob.getJobId(), MonitoringUtil.getJobMonitoringPageURL(newJob.getProjectId(), newJob.getJobId())); throw new DataflowJobUpdatedException(job, String.format("Job %s updated; new job is %s.", job.getJobId(), newJob.getJobId()), newJob); } else if (result == State.CANCELLED) { String message = String.format("Job %s cancelled by user", job.getJobId()); LOG.info(message); throw new DataflowJobCancelledException(job, message); } else { throw new DataflowJobExecutionException(job, "Job " + job.getJobId() + " failed with status " + result); } } finally { Runtime.getRuntime().removeShutdownHook(shutdownHook); } }