List of usage examples for org.joda.time Duration standardSeconds
public static Duration standardSeconds(long seconds)
From source file:org.apache.beam.runners.dataflow.worker.graph.CreateExecutableStageNodeFunction.java
License:Apache License
@Override public Node apply(MutableNetwork<Node, Edge> input) { for (Node node : input.nodes()) { if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode || node instanceof InstructionOutputNode) { continue; }/*from w w w .j a va2s . c o m*/ throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input)); } // Fix all non output nodes to have named edges. for (Node node : input.nodes()) { if (node instanceof InstructionOutputNode) { continue; } for (Node successor : input.successors(node)) { for (Edge edge : input.edgesConnecting(node, successor)) { if (edge instanceof DefaultEdge) { input.removeEdge(edge); input.addEdge(node, successor, MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId()))); } } } } RunnerApi.Components.Builder componentsBuilder = RunnerApi.Components.newBuilder(); componentsBuilder.mergeFrom(this.pipeline.getComponents()); // We start off by replacing all edges within the graph with edges that have the named // outputs from the predecessor step. For ParallelInstruction Source nodes and RemoteGrpcPort // nodes this is a generated port id. All ParDoInstructions will have already // For intermediate PCollections we fabricate, we make a bogus WindowingStrategy // TODO: create a correct windowing strategy, including coders and environment // An SdkFunctionSpec is invalid without a working environment reference. We can revamp that // when we inline SdkFunctionSpec and FunctionSpec, both slated for inlining wherever they occur // Default to use the Java environment if pipeline doesn't have environment specified. if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) { String envId = Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn() + idGenerator.getId(); componentsBuilder.putEnvironments(envId, Environments.JAVA_SDK_HARNESS_ENVIRONMENT); } // By default, use GlobalWindow for all languages. // For java, if there is a IntervalWindowCoder, then use FixedWindow instead. // TODO: should get real WindowingStategy from pipeline proto. String globalWindowingStrategyId = "generatedGlobalWindowingStrategy" + idGenerator.getId(); String intervalWindowEncodingWindowingStrategyId = "generatedIntervalWindowEncodingWindowingStrategy" + idGenerator.getId(); SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents()); try { registerWindowingStrategy(globalWindowingStrategyId, WindowingStrategy.globalDefault(), componentsBuilder, sdkComponents); registerWindowingStrategy(intervalWindowEncodingWindowingStrategyId, WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))), componentsBuilder, sdkComponents); } catch (IOException exc) { throw new RuntimeException("Could not convert default windowing stratey to proto", exc); } Map<Node, String> nodesToPCollections = new HashMap<>(); ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder(); ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder(); ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap .builder(); // A field of ExecutableStage which includes the PCollection goes to worker side. Set<PCollectionNode> executableStageOutputs = new HashSet<>(); // A field of ExecutableStage which includes the PCollection goes to runner side. Set<PCollectionNode> executableStageInputs = new HashSet<>(); for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) { InstructionOutput instructionOutput = node.getInstructionOutput(); String coderId = "generatedCoder" + idGenerator.getId(); String windowingStrategyId; try (ByteString.Output output = ByteString.newOutput()) { try { Coder<?> javaCoder = CloudObjects .coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec())); Coder<?> elementCoder = ((WindowedValueCoder<?>) javaCoder).getValueCoder(); sdkComponents.registerCoder(elementCoder); RunnerApi.Coder coderProto = CoderTranslation.toProto(elementCoder, sdkComponents); componentsBuilder.putCoders(coderId, coderProto); // For now, Dataflow runner harness only deal with FixedWindow. if (javaCoder instanceof FullWindowedValueCoder) { FullWindowedValueCoder<?> windowedValueCoder = (FullWindowedValueCoder<?>) javaCoder; Coder<?> windowCoder = windowedValueCoder.getWindowCoder(); if (windowCoder instanceof IntervalWindowCoder) { windowingStrategyId = intervalWindowEncodingWindowingStrategyId; } else if (windowCoder instanceof GlobalWindow.Coder) { windowingStrategyId = globalWindowingStrategyId; } else { throw new UnsupportedOperationException(String.format( "Dataflow portable runner harness doesn't support windowing with %s", windowCoder)); } } else { throw new UnsupportedOperationException( "Dataflow portable runner harness only supports FullWindowedValueCoder"); } } catch (IOException e) { throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e); } catch (Exception e) { // Coder probably wasn't a java coder OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec()); componentsBuilder.putCoders(coderId, RunnerApi.Coder.newBuilder() .setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString())) .build()); // For non-java coder, hope it's GlobalWindows by default. // TODO(BEAM-6231): Actually discover the right windowing strategy. windowingStrategyId = globalWindowingStrategyId; } } catch (IOException e) { throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e); } // TODO(BEAM-6275): Set correct IsBounded on generated PCollections String pcollectionId = node.getPcollectionId(); RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setCoderId(coderId) .setWindowingStrategyId(windowingStrategyId).setIsBounded(RunnerApi.IsBounded.Enum.BOUNDED) .build(); nodesToPCollections.put(node, pcollectionId); componentsBuilder.putPcollections(pcollectionId, pCollection); // Check whether this output collection has consumers from worker side when // "use_executable_stage_bundle_execution" // is set if (isExecutableStageOutputPCollection(input, node)) { executableStageOutputs.add(PipelineNode.pCollection(pcollectionId, pCollection)); } if (isExecutableStageInputPCollection(input, node)) { executableStageInputs.add(PipelineNode.pCollection(pcollectionId, pCollection)); } } componentsBuilder.putAllCoders(sdkComponents.toComponents().getCodersMap()); Set<PTransformNode> executableStageTransforms = new HashSet<>(); Set<TimerReference> executableStageTimers = new HashSet<>(); List<UserStateId> userStateIds = new ArrayList<>(); Set<SideInputReference> executableStageSideInputs = new HashSet<>(); for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) { ImmutableMap.Builder<String, PCollectionNode> sideInputIds = ImmutableMap.builder(); ParallelInstruction parallelInstruction = node.getParallelInstruction(); String ptransformId = "generatedPtransform" + idGenerator.getId(); ptransformIdToNameContexts.put(ptransformId, NameContext.create(null, parallelInstruction.getOriginalName(), parallelInstruction.getSystemName(), parallelInstruction.getName())); RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder(); RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder(); List<String> timerIds = new ArrayList<>(); if (parallelInstruction.getParDo() != null) { ParDoInstruction parDoInstruction = parallelInstruction.getParDo(); CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn()); String userFnClassName = userFnSpec.getClassName(); if (userFnClassName.equals("CombineValuesFn") || userFnClassName.equals("KeyedCombineFn")) { transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec); ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList()); } else { String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN); RunnerApi.PTransform parDoPTransform = pipeline.getComponents() .getTransformsOrDefault(parDoPTransformId, null); // TODO: only the non-null branch should exist; for migration ease only if (parDoPTransform != null) { checkArgument( parDoPTransform.getSpec().getUrn() .equals(PTransformTranslation.PAR_DO_TRANSFORM_URN), "Found transform \"%s\" for ParallelDo instruction, " + " but that transform had unexpected URN \"%s\" (expected \"%s\")", parDoPTransformId, parDoPTransform.getSpec().getUrn(), PTransformTranslation.PAR_DO_TRANSFORM_URN); RunnerApi.ParDoPayload parDoPayload; try { parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload()); } catch (InvalidProtocolBufferException exc) { throw new RuntimeException("ParDo did not have a ParDoPayload", exc); } // Build the necessary components to inform the SDK Harness of the pipeline's // user timers and user state. for (Map.Entry<String, RunnerApi.TimerSpec> entry : parDoPayload.getTimerSpecsMap() .entrySet()) { timerIds.add(entry.getKey()); } for (Map.Entry<String, RunnerApi.StateSpec> entry : parDoPayload.getStateSpecsMap() .entrySet()) { UserStateId.Builder builder = UserStateId.newBuilder(); builder.setTransformId(parDoPTransformId); builder.setLocalName(entry.getKey()); userStateIds.add(builder.build()); } // To facilitate the creation of Set executableStageSideInputs. for (String sideInputTag : parDoPayload.getSideInputsMap().keySet()) { String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag); RunnerApi.PCollection sideInputPCollection = pipeline.getComponents() .getPcollectionsOrThrow(sideInputPCollectionId); pTransform.putInputs(sideInputTag, sideInputPCollectionId); PCollectionNode pCollectionNode = PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection); sideInputIds.put(sideInputTag, pCollectionNode); } // To facilitate the creation of Map(ptransformId -> pCollectionView), which is // required by constructing an ExecutableStageNode. ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder(); for (Map.Entry<String, RunnerApi.SideInput> sideInputEntry : parDoPayload.getSideInputsMap() .entrySet()) { pcollectionViews.add(RegisterNodeFunction.transformSideInputForRunner(pipeline, parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue())); } ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build()); transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN) .setPayload(parDoPayload.toByteString()); } else { // legacy path - bytes are the SdkFunctionSpec's payload field, basically, and // SDKs expect it in the PTransform's payload field byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN); transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN) .setPayload(ByteString.copyFrom(userFnBytes)); } if (parDoInstruction.getSideInputs() != null) { ptransformIdToSideInputInfos.put(ptransformId, forSideInputInfos(parDoInstruction.getSideInputs(), true)); } } } else if (parallelInstruction.getRead() != null) { ReadInstruction readInstruction = parallelInstruction.getRead(); CloudObject sourceSpec = CloudObject .fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec()); // TODO: Need to plumb through the SDK specific function spec. transformSpec.setUrn(JAVA_SOURCE_URN); try { byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE)); ByteString sourceByteString = ByteString.copyFrom(serializedSource); transformSpec.setPayload(sourceByteString); } catch (Exception e) { throw new IllegalArgumentException( String.format("Unable to process Read %s", parallelInstruction), e); } } else if (parallelInstruction.getFlatten() != null) { transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN); } else { throw new IllegalArgumentException( String.format("Unknown type of ParallelInstruction %s", parallelInstruction)); } // Even though this is a for-loop, there is only going to be a single PCollection as the // predecessor in a ParDo. This PCollection is called the "main input". for (Node predecessorOutput : input.predecessors(node)) { pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(predecessorOutput)); } for (Edge edge : input.outEdges(node)) { Node nodeOutput = input.incidentNodes(edge).target(); MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge; pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput)); } pTransform.setSpec(transformSpec); PTransformNode pTransformNode = PipelineNode.pTransform(ptransformId, pTransform.build()); executableStageTransforms.add(pTransformNode); for (String timerId : timerIds) { executableStageTimers.add(TimerReference.of(pTransformNode, timerId)); } ImmutableMap<String, PCollectionNode> sideInputIdToPCollectionNodes = sideInputIds.build(); for (String sideInputTag : sideInputIdToPCollectionNodes.keySet()) { SideInputReference sideInputReference = SideInputReference.of(pTransformNode, sideInputTag, sideInputIdToPCollectionNodes.get(sideInputTag)); executableStageSideInputs.add(sideInputReference); } executableStageTransforms.add(pTransformNode); } if (executableStageInputs.size() != 1) { throw new UnsupportedOperationException("ExecutableStage only support one input PCollection"); } PCollectionNode executableInput = executableStageInputs.iterator().next(); RunnerApi.Components executableStageComponents = componentsBuilder.build(); // Get Environment from ptransform, otherwise, use JAVA_SDK_HARNESS_ENVIRONMENT as default. Environment executableStageEnv = getEnvironmentFromPTransform(executableStageComponents, executableStageTransforms); if (executableStageEnv == null) { executableStageEnv = Environments.JAVA_SDK_HARNESS_ENVIRONMENT; } Set<UserStateReference> executableStageUserStateReference = new HashSet<>(); for (UserStateId userStateId : userStateIds) { executableStageUserStateReference .add(UserStateReference.fromUserStateId(userStateId, executableStageComponents)); } ExecutableStage executableStage = ImmutableExecutableStage.ofFullComponents(executableStageComponents, executableStageEnv, executableInput, executableStageSideInputs, executableStageUserStateReference, executableStageTimers, executableStageTransforms, executableStageOutputs); return ExecutableStageNode.create(executableStage, ptransformIdToNameContexts.build(), ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build()); }
From source file:org.apache.beam.runners.direct.SplittableProcessElementsEvaluatorFactory.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator( AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT>> application, CommittedBundle<InputT> inputBundle) throws Exception { final ProcessElements<InputT, OutputT, RestrictionT, PositionT> transform = application.getTransform(); final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory .createEvaluator((AppliedPTransform) application, (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(), inputBundle.getKey(), application.getTransform().getSideInputs(), application.getTransform().getMainOutputTag(), application.getTransform().getAdditionalOutputTags().getAll(), DoFnSchemaInformation.create(), Collections.emptyMap()); final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator(); final ProcessFn<InputT, OutputT, RestrictionT, PositionT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT>) ProcessFnRunner.class .cast(pde.getFnRunner()).getFn(); final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext(); processFn.setStateInternalsFactory(key -> stepContext.stateInternals()); processFn.setTimerInternalsFactory(key -> stepContext.timerInternals()); OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() { private final OutputManager outputManager = pde.getOutputManager(); @Override// www .j a v a 2 s . c o m public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane)); } @Override public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane)); } }; processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>( transform.getFn(), options, outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), ses, // Setting small values here to stimulate frequent checkpointing and better exercise // splittable DoFn's in that respect. 100, Duration.standardSeconds(1))); return evaluator; }
From source file:org.apache.beam.runners.flink.examples.streaming.AutoComplete.java
License:Apache License
public static void main(String[] args) throws IOException { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setStreaming(true);//from ww w .j ava2s. com options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkRunner.class); WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize())); // Create the pipeline. Pipeline p = Pipeline.create(options); PCollection<KV<String, List<CompletionCandidate>>> toWrite = p .apply("WordStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3))) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO).discardingFiredPanes()) .apply(ComputeTopCompletions.top(10, options.getRecursive())); toWrite.apply("FormatForPerTaskFile", ParDo.of(new FormatForPerTaskLocalFile())) .apply(TextIO.Write.to("./outputAutoComplete.txt")); p.run(); }
From source file:org.apache.beam.runners.flink.examples.streaming.JoinExamples.java
License:Apache License
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setStreaming(true);// www . jav a 2s. c o m options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkRunner.class); WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize())); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<String> streamA = p .apply("FirstStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3))) .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO).discardingFiredPanes()); PCollection<String> streamB = p .apply("SecondStream", Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3))) .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO).discardingFiredPanes()); PCollection<String> formattedResults = joinEvents(streamA, streamB); formattedResults.apply(TextIO.Write.to("./outputJoin.txt")); p.run(); }
From source file:org.apache.beam.runners.flink.examples.streaming.KafkaWindowedWordCountExample.java
License:Apache License
public static void main(String[] args) { PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class); KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args) .as(KafkaStreamingWordCountOptions.class); options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds"); options.setStreaming(true);/*from w w w.ja v a 2s. c o m*/ options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkRunner.class); System.out.println(options.getKafkaTopic() + " " + options.getZookeeper() + " " + options.getBroker() + " " + options.getGroup()); Pipeline pipeline = Pipeline.create(options); Properties p = new Properties(); p.setProperty("zookeeper.connect", options.getZookeeper()); p.setProperty("bootstrap.servers", options.getBroker()); p.setProperty("group.id", options.getGroup()); // this is the Flink consumer that reads the input to // the program from a kafka topic. FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p); PCollection<String> words = pipeline .apply("StreamingWordCount", Read.from(UnboundedFlinkSource.of(kafkaConsumer))) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt")); pipeline.run(); }
From source file:org.apache.beam.runners.flink.examples.streaming.WindowedWordCount.java
License:Apache License
public static void main(String[] args) throws IOException { StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(StreamingWordCountOptions.class); options.setStreaming(true);//from w ww . j av a2 s . c o m options.setWindowSize(10L); options.setSlide(5L); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkRunner.class); LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() + " sec. and a slide of " + options.getSlide()); Pipeline pipeline = Pipeline.create(options); PCollection<String> words = pipeline .apply("StreamingWordCount", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3))) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window .<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize())) .every(Duration.standardSeconds(options.getSlide()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputWordCount.txt")); pipeline.run(); }
From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java
License:Apache License
@Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); checkState(doFn instanceof ProcessFn); // this will implicitly be keyed by the key of the incoming // element or by the key of a firing timer StateInternalsFactory<byte[]> stateInternalsFactory = key -> (StateInternals) keyedStateInternals; // this will implicitly be keyed like the StateInternalsFactory TimerInternalsFactory<byte[]> timerInternalsFactory = key -> timerInternals; executorService = Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()); ((ProcessFn) doFn).setStateInternalsFactory(stateInternalsFactory); ((ProcessFn) doFn).setTimerInternalsFactory(timerInternalsFactory); ((ProcessFn) doFn).setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, serializedOptions.get(), new OutputWindowedValue<OutputT>() { @Override//from w ww. ja va 2 s. c o m public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { outputManager.output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane)); } @Override public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane)); } }, sideInputReader, executorService, 10000, Duration.standardSeconds(10))); }
From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java
License:Apache License
@Override public void close() throws Exception { super.close(); executorService.shutdown();/*from w ww . j av a 2s . c o m*/ long shutdownTimeout = Duration.standardSeconds(10).getMillis(); try { if (!executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS)) { LOG.debug("The scheduled executor service did not properly terminate. Shutting " + "it down now."); executorService.shutdownNow(); } } catch (InterruptedException e) { LOG.debug("Could not properly await the termination of the scheduled executor service.", e); executorService.shutdownNow(); } }
From source file:org.apache.beam.samples.unbounded.KafkaToKafka.java
License:Apache License
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); LOG.info(options.toString());//from ww w. j a v a 2 s . c o m System.out.println(options.toString()); Pipeline pipeline = Pipeline.create(options); // now we connect to the queue and process every event PCollection<String> data = pipeline .apply("ReadFromKafka", KafkaIO.<String, String>read().withBootstrapServers(options.getKafkaServer()) .withTopics(Collections.singletonList(options.getInputTopic())).withoutMetadata()) .apply("ExtractPayload", Values.<String>create()); data.apply(ParDo.of(new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { System.out.println(String.format("** element |%s| **", c.element())); } })); // We filter the events for a given country (IN=India) and send them to their own Topic final String country = "IN"; PCollection<String> eventsInIndia = data.apply("FilterByCountry", ParDo.of(new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { // if (getCountry(c.element()).equals(country)){ c.output(c.element()); // } } })); PCollection<KV<String, String>> eventsInIndiaKV = eventsInIndia.apply("ExtractPayload", ParDo.of(new DoFn<String, KV<String, String>>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { c.output(KV.of("india", c.element())); } })); eventsInIndiaKV.apply("WriteToKafka", KafkaIO.<String, String>write().withBootstrapServers(options.getKafkaServer()) .withTopic(options.getOutputTopic()) .withKeySerializer(org.apache.kafka.common.serialization.StringSerializer.class) .withValueSerializer(org.apache.kafka.common.serialization.StringSerializer.class)); PipelineResult run = pipeline.run(); run.waitUntilFinish(Duration.standardSeconds(options.getDuration())); }
From source file:org.apache.beam.sdk.extensions.sql.impl.rel.BeamEnumerableConverter.java
License:Apache License
private static PipelineResult limitRun(PipelineOptions options, BeamRelNode node, DoFn<Row, Void> doFn, Queue<Row> values, int limitCount) { options.as(DirectOptions.class).setBlockOnRun(false); Pipeline pipeline = Pipeline.create(options); PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node); resultCollection.apply(ParDo.of(doFn)); PipelineResult result = pipeline.run(); State state;/* ww w . j a va 2 s . c om*/ while (true) { // Check pipeline state in every second state = result.waitUntilFinish(Duration.standardSeconds(1)); if (state != null && state.isTerminal()) { break; } try { if (values.size() >= limitCount) { result.cancel(); break; } } catch (IOException e) { LOG.warn(e.toString()); break; } } return result; }