Example usage for org.joda.time Duration standardSeconds

Introduction

In this page you can find the example usage for org.joda.time Duration standardSeconds.

Prototype

public static Duration standardSeconds(long seconds)

Source Link

Document

Create a duration with the specified number of seconds assuming that there are the standard number of milliseconds in a second.

Usage

From source file:org.apache.beam.runners.dataflow.worker.graph.CreateExecutableStageNodeFunction.java

License:Apache License

@Override
public Node apply(MutableNetwork<Node, Edge> input) {
    for (Node node : input.nodes()) {
        if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode
                || node instanceof InstructionOutputNode) {
            continue;
        }/*from w w w  .j a va2s . c  o m*/
        throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input));
    }

    // Fix all non output nodes to have named edges.
    for (Node node : input.nodes()) {
        if (node instanceof InstructionOutputNode) {
            continue;
        }
        for (Node successor : input.successors(node)) {
            for (Edge edge : input.edgesConnecting(node, successor)) {
                if (edge instanceof DefaultEdge) {
                    input.removeEdge(edge);
                    input.addEdge(node, successor,
                            MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId())));
                }
            }
        }
    }

    RunnerApi.Components.Builder componentsBuilder = RunnerApi.Components.newBuilder();
    componentsBuilder.mergeFrom(this.pipeline.getComponents());

    // We start off by replacing all edges within the graph with edges that have the named
    // outputs from the predecessor step. For ParallelInstruction Source nodes and RemoteGrpcPort
    // nodes this is a generated port id. All ParDoInstructions will have already

    // For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
    // TODO: create a correct windowing strategy, including coders and environment
    // An SdkFunctionSpec is invalid without a working environment reference. We can revamp that
    // when we inline SdkFunctionSpec and FunctionSpec, both slated for inlining wherever they occur

    // Default to use the Java environment if pipeline doesn't have environment specified.
    if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
        String envId = Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn() + idGenerator.getId();
        componentsBuilder.putEnvironments(envId, Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    }

    // By default, use GlobalWindow for all languages.
    // For java, if there is a IntervalWindowCoder, then use FixedWindow instead.
    // TODO: should get real WindowingStategy from pipeline proto.
    String globalWindowingStrategyId = "generatedGlobalWindowingStrategy" + idGenerator.getId();
    String intervalWindowEncodingWindowingStrategyId = "generatedIntervalWindowEncodingWindowingStrategy"
            + idGenerator.getId();

    SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents());
    try {
        registerWindowingStrategy(globalWindowingStrategyId, WindowingStrategy.globalDefault(),
                componentsBuilder, sdkComponents);
        registerWindowingStrategy(intervalWindowEncodingWindowingStrategyId,
                WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))), componentsBuilder,
                sdkComponents);
    } catch (IOException exc) {
        throw new RuntimeException("Could not convert default windowing stratey to proto", exc);
    }

    Map<Node, String> nodesToPCollections = new HashMap<>();
    ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder();

    ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder();
    ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap
            .builder();

    // A field of ExecutableStage which includes the PCollection goes to worker side.
    Set<PCollectionNode> executableStageOutputs = new HashSet<>();
    // A field of ExecutableStage which includes the PCollection goes to runner side.
    Set<PCollectionNode> executableStageInputs = new HashSet<>();

    for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) {
        InstructionOutput instructionOutput = node.getInstructionOutput();

        String coderId = "generatedCoder" + idGenerator.getId();
        String windowingStrategyId;
        try (ByteString.Output output = ByteString.newOutput()) {
            try {
                Coder<?> javaCoder = CloudObjects
                        .coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec()));
                Coder<?> elementCoder = ((WindowedValueCoder<?>) javaCoder).getValueCoder();
                sdkComponents.registerCoder(elementCoder);
                RunnerApi.Coder coderProto = CoderTranslation.toProto(elementCoder, sdkComponents);
                componentsBuilder.putCoders(coderId, coderProto);
                // For now, Dataflow runner harness only deal with FixedWindow.
                if (javaCoder instanceof FullWindowedValueCoder) {
                    FullWindowedValueCoder<?> windowedValueCoder = (FullWindowedValueCoder<?>) javaCoder;
                    Coder<?> windowCoder = windowedValueCoder.getWindowCoder();
                    if (windowCoder instanceof IntervalWindowCoder) {
                        windowingStrategyId = intervalWindowEncodingWindowingStrategyId;
                    } else if (windowCoder instanceof GlobalWindow.Coder) {
                        windowingStrategyId = globalWindowingStrategyId;
                    } else {
                        throw new UnsupportedOperationException(String.format(
                                "Dataflow portable runner harness doesn't support windowing with %s",
                                windowCoder));
                    }
                } else {
                    throw new UnsupportedOperationException(
                            "Dataflow portable runner harness only supports FullWindowedValueCoder");
                }
            } catch (IOException e) {
                throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s",
                        instructionOutput.getCodec(), instructionOutput), e);
            } catch (Exception e) {
                // Coder probably wasn't a java coder
                OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec());
                componentsBuilder.putCoders(coderId,
                        RunnerApi.Coder.newBuilder()
                                .setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString()))
                                .build());
                // For non-java coder, hope it's GlobalWindows by default.
                // TODO(BEAM-6231): Actually discover the right windowing strategy.
                windowingStrategyId = globalWindowingStrategyId;
            }
        } catch (IOException e) {
            throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s",
                    instructionOutput.getCodec(), instructionOutput), e);
        }

        // TODO(BEAM-6275): Set correct IsBounded on generated PCollections
        String pcollectionId = node.getPcollectionId();
        RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setCoderId(coderId)
                .setWindowingStrategyId(windowingStrategyId).setIsBounded(RunnerApi.IsBounded.Enum.BOUNDED)
                .build();
        nodesToPCollections.put(node, pcollectionId);
        componentsBuilder.putPcollections(pcollectionId, pCollection);

        // Check whether this output collection has consumers from worker side when
        // "use_executable_stage_bundle_execution"
        // is set
        if (isExecutableStageOutputPCollection(input, node)) {
            executableStageOutputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
        }
        if (isExecutableStageInputPCollection(input, node)) {
            executableStageInputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
        }
    }

    componentsBuilder.putAllCoders(sdkComponents.toComponents().getCodersMap());

    Set<PTransformNode> executableStageTransforms = new HashSet<>();
    Set<TimerReference> executableStageTimers = new HashSet<>();
    List<UserStateId> userStateIds = new ArrayList<>();
    Set<SideInputReference> executableStageSideInputs = new HashSet<>();

    for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) {
        ImmutableMap.Builder<String, PCollectionNode> sideInputIds = ImmutableMap.builder();
        ParallelInstruction parallelInstruction = node.getParallelInstruction();
        String ptransformId = "generatedPtransform" + idGenerator.getId();
        ptransformIdToNameContexts.put(ptransformId,
                NameContext.create(null, parallelInstruction.getOriginalName(),
                        parallelInstruction.getSystemName(), parallelInstruction.getName()));

        RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
        RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder();

        List<String> timerIds = new ArrayList<>();
        if (parallelInstruction.getParDo() != null) {
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
            String userFnClassName = userFnSpec.getClassName();

            if (userFnClassName.equals("CombineValuesFn") || userFnClassName.equals("KeyedCombineFn")) {
                transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec);
                ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList());
            } else {
                String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);

                RunnerApi.PTransform parDoPTransform = pipeline.getComponents()
                        .getTransformsOrDefault(parDoPTransformId, null);

                // TODO: only the non-null branch should exist; for migration ease only
                if (parDoPTransform != null) {
                    checkArgument(
                            parDoPTransform.getSpec().getUrn()
                                    .equals(PTransformTranslation.PAR_DO_TRANSFORM_URN),
                            "Found transform \"%s\" for ParallelDo instruction, "
                                    + " but that transform had unexpected URN \"%s\" (expected \"%s\")",
                            parDoPTransformId, parDoPTransform.getSpec().getUrn(),
                            PTransformTranslation.PAR_DO_TRANSFORM_URN);

                    RunnerApi.ParDoPayload parDoPayload;
                    try {
                        parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
                    } catch (InvalidProtocolBufferException exc) {
                        throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
                    }

                    // Build the necessary components to inform the SDK Harness of the pipeline's
                    // user timers and user state.
                    for (Map.Entry<String, RunnerApi.TimerSpec> entry : parDoPayload.getTimerSpecsMap()
                            .entrySet()) {
                        timerIds.add(entry.getKey());
                    }
                    for (Map.Entry<String, RunnerApi.StateSpec> entry : parDoPayload.getStateSpecsMap()
                            .entrySet()) {
                        UserStateId.Builder builder = UserStateId.newBuilder();
                        builder.setTransformId(parDoPTransformId);
                        builder.setLocalName(entry.getKey());
                        userStateIds.add(builder.build());
                    }

                    // To facilitate the creation of Set executableStageSideInputs.
                    for (String sideInputTag : parDoPayload.getSideInputsMap().keySet()) {
                        String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
                        RunnerApi.PCollection sideInputPCollection = pipeline.getComponents()
                                .getPcollectionsOrThrow(sideInputPCollectionId);

                        pTransform.putInputs(sideInputTag, sideInputPCollectionId);

                        PCollectionNode pCollectionNode = PipelineNode.pCollection(sideInputPCollectionId,
                                sideInputPCollection);
                        sideInputIds.put(sideInputTag, pCollectionNode);
                    }

                    // To facilitate the creation of Map(ptransformId -> pCollectionView), which is
                    // required by constructing an ExecutableStageNode.
                    ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder();
                    for (Map.Entry<String, RunnerApi.SideInput> sideInputEntry : parDoPayload.getSideInputsMap()
                            .entrySet()) {
                        pcollectionViews.add(RegisterNodeFunction.transformSideInputForRunner(pipeline,
                                parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue()));
                    }
                    ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build());

                    transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                            .setPayload(parDoPayload.toByteString());
                } else {
                    // legacy path - bytes are the SdkFunctionSpec's payload field, basically, and
                    // SDKs expect it in the PTransform's payload field
                    byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
                    transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN)
                            .setPayload(ByteString.copyFrom(userFnBytes));
                }

                if (parDoInstruction.getSideInputs() != null) {
                    ptransformIdToSideInputInfos.put(ptransformId,
                            forSideInputInfos(parDoInstruction.getSideInputs(), true));
                }
            }
        } else if (parallelInstruction.getRead() != null) {
            ReadInstruction readInstruction = parallelInstruction.getRead();
            CloudObject sourceSpec = CloudObject
                    .fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec());
            // TODO: Need to plumb through the SDK specific function spec.
            transformSpec.setUrn(JAVA_SOURCE_URN);
            try {
                byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE));
                ByteString sourceByteString = ByteString.copyFrom(serializedSource);
                transformSpec.setPayload(sourceByteString);
            } catch (Exception e) {
                throw new IllegalArgumentException(
                        String.format("Unable to process Read %s", parallelInstruction), e);
            }
        } else if (parallelInstruction.getFlatten() != null) {
            transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN);
        } else {
            throw new IllegalArgumentException(
                    String.format("Unknown type of ParallelInstruction %s", parallelInstruction));
        }

        // Even though this is a for-loop, there is only going to be a single PCollection as the
        // predecessor in a ParDo. This PCollection is called the "main input".
        for (Node predecessorOutput : input.predecessors(node)) {
            pTransform.putInputs("generatedInput" + idGenerator.getId(),
                    nodesToPCollections.get(predecessorOutput));
        }

        for (Edge edge : input.outEdges(node)) {
            Node nodeOutput = input.incidentNodes(edge).target();
            MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge;
            pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput));
        }

        pTransform.setSpec(transformSpec);
        PTransformNode pTransformNode = PipelineNode.pTransform(ptransformId, pTransform.build());
        executableStageTransforms.add(pTransformNode);

        for (String timerId : timerIds) {
            executableStageTimers.add(TimerReference.of(pTransformNode, timerId));
        }

        ImmutableMap<String, PCollectionNode> sideInputIdToPCollectionNodes = sideInputIds.build();
        for (String sideInputTag : sideInputIdToPCollectionNodes.keySet()) {
            SideInputReference sideInputReference = SideInputReference.of(pTransformNode, sideInputTag,
                    sideInputIdToPCollectionNodes.get(sideInputTag));
            executableStageSideInputs.add(sideInputReference);
        }

        executableStageTransforms.add(pTransformNode);
    }

    if (executableStageInputs.size() != 1) {
        throw new UnsupportedOperationException("ExecutableStage only support one input PCollection");
    }

    PCollectionNode executableInput = executableStageInputs.iterator().next();
    RunnerApi.Components executableStageComponents = componentsBuilder.build();

    // Get Environment from ptransform, otherwise, use JAVA_SDK_HARNESS_ENVIRONMENT as default.
    Environment executableStageEnv = getEnvironmentFromPTransform(executableStageComponents,
            executableStageTransforms);
    if (executableStageEnv == null) {
        executableStageEnv = Environments.JAVA_SDK_HARNESS_ENVIRONMENT;
    }

    Set<UserStateReference> executableStageUserStateReference = new HashSet<>();
    for (UserStateId userStateId : userStateIds) {
        executableStageUserStateReference
                .add(UserStateReference.fromUserStateId(userStateId, executableStageComponents));
    }

    ExecutableStage executableStage = ImmutableExecutableStage.ofFullComponents(executableStageComponents,
            executableStageEnv, executableInput, executableStageSideInputs, executableStageUserStateReference,
            executableStageTimers, executableStageTransforms, executableStageOutputs);
    return ExecutableStageNode.create(executableStage, ptransformIdToNameContexts.build(),
            ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build());
}

From source file:org.apache.beam.runners.direct.SplittableProcessElementsEvaluatorFactory.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator(
        AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT>> application,
        CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, PositionT> transform = application.getTransform();

    final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory
            .createEvaluator((AppliedPTransform) application,
                    (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(),
                    inputBundle.getKey(), application.getTransform().getSideInputs(),
                    application.getTransform().getMainOutputTag(),
                    application.getTransform().getAdditionalOutputTags().getAll(),
                    DoFnSchemaInformation.create(), Collections.emptyMap());
    final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator();
    final ProcessFn<InputT, OutputT, RestrictionT, PositionT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT>) ProcessFnRunner.class
            .cast(pde.getFnRunner()).getFn();

    final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext();
    processFn.setStateInternalsFactory(key -> stepContext.stateInternals());
    processFn.setTimerInternalsFactory(key -> stepContext.timerInternals());

    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
        private final OutputManager outputManager = pde.getOutputManager();

        @Override//  www  .j a  v a  2  s  . c o m
        public void outputWindowedValue(OutputT output, Instant timestamp,
                Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(),
                    WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag,
                AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
            transform.getFn(), options, outputWindowedValue,
            evaluationContext.createSideInputReader(transform.getSideInputs()), ses,
            // Setting small values here to stimulate frequent checkpointing and better exercise
            // splittable DoFn's in that respect.
            100, Duration.standardSeconds(1)));

    return evaluator;
}

From source file:org.apache.beam.runners.flink.examples.streaming.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setStreaming(true);//from ww  w  .j  ava2s. com
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
            .apply("WordStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes())
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    toWrite.apply("FormatForPerTaskFile", ParDo.of(new FormatForPerTaskLocalFile()))
            .apply(TextIO.Write.to("./outputAutoComplete.txt"));

    p.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.JoinExamples.java

License:Apache License

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setStreaming(true);// www . jav a 2s.  c  o  m
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

    Pipeline p = Pipeline.create(options);

    // the following two 'applys' create multiple inputs to our pipeline, one for each
    // of our two input sources.
    PCollection<String> streamA = p
            .apply("FirstStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes());
    PCollection<String> streamB = p
            .apply("SecondStream", Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3)))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes());

    PCollection<String> formattedResults = joinEvents(streamA, streamB);
    formattedResults.apply(TextIO.Write.to("./outputJoin.txt"));
    p.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.KafkaWindowedWordCountExample.java

License:Apache License

public static void main(String[] args) {
    PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
    KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args)
            .as(KafkaStreamingWordCountOptions.class);
    options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
    options.setStreaming(true);/*from w w  w.ja v  a  2s.  c o  m*/
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    System.out.println(options.getKafkaTopic() + " " + options.getZookeeper() + " " + options.getBroker() + " "
            + options.getGroup());
    Pipeline pipeline = Pipeline.create(options);

    Properties p = new Properties();
    p.setProperty("zookeeper.connect", options.getZookeeper());
    p.setProperty("bootstrap.servers", options.getBroker());
    p.setProperty("group.id", options.getGroup());

    // this is the Flink consumer that reads the input to
    // the program from a kafka topic.
    FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaTopic(),
            new SimpleStringSchema(), p);

    PCollection<String> words = pipeline
            .apply("StreamingWordCount", Read.from(UnboundedFlinkSource.of(kafkaConsumer)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
                    .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt"));

    pipeline.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.WindowedWordCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(StreamingWordCountOptions.class);
    options.setStreaming(true);//from   w  ww . j av a2  s . c  o m
    options.setWindowSize(10L);
    options.setSlide(5L);
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() + " sec. and a slide of "
            + options.getSlide());

    Pipeline pipeline = Pipeline.create(options);

    PCollection<String> words = pipeline
            .apply("StreamingWordCount", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window
                    .<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize()))
                            .every(Duration.standardSeconds(options.getSlide())))
                    .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputWordCount.txt"));

    pipeline.run();
}

From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java

License:Apache License

@Override
public void initializeState(StateInitializationContext context) throws Exception {
    super.initializeState(context);

    checkState(doFn instanceof ProcessFn);

    // this will implicitly be keyed by the key of the incoming
    // element or by the key of a firing timer
    StateInternalsFactory<byte[]> stateInternalsFactory = key -> (StateInternals) keyedStateInternals;

    // this will implicitly be keyed like the StateInternalsFactory
    TimerInternalsFactory<byte[]> timerInternalsFactory = key -> timerInternals;

    executorService = Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory());

    ((ProcessFn) doFn).setStateInternalsFactory(stateInternalsFactory);
    ((ProcessFn) doFn).setTimerInternalsFactory(timerInternalsFactory);
    ((ProcessFn) doFn).setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn,
            serializedOptions.get(), new OutputWindowedValue<OutputT>() {
                @Override//from   w ww.  ja va 2 s.  c  o m
                public void outputWindowedValue(OutputT output, Instant timestamp,
                        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                    outputManager.output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
                }

                @Override
                public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag,
                        AdditionalOutputT output, Instant timestamp,
                        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                    outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
                }
            }, sideInputReader, executorService, 10000, Duration.standardSeconds(10)));
}

From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java

License:Apache License

@Override
public void close() throws Exception {
    super.close();

    executorService.shutdown();/*from  w ww . j  av a 2s  . c  o m*/

    long shutdownTimeout = Duration.standardSeconds(10).getMillis();
    try {
        if (!executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS)) {
            LOG.debug("The scheduled executor service did not properly terminate. Shutting " + "it down now.");
            executorService.shutdownNow();
        }
    } catch (InterruptedException e) {
        LOG.debug("Could not properly await the termination of the scheduled executor service.", e);
        executorService.shutdownNow();
    }
}

From source file:org.apache.beam.samples.unbounded.KafkaToKafka.java

License:Apache License

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    LOG.info(options.toString());//from ww w. j  a v a 2  s  . c o m
    System.out.println(options.toString());
    Pipeline pipeline = Pipeline.create(options);

    // now we connect to the queue and process every event
    PCollection<String> data = pipeline
            .apply("ReadFromKafka",
                    KafkaIO.<String, String>read().withBootstrapServers(options.getKafkaServer())
                            .withTopics(Collections.singletonList(options.getInputTopic())).withoutMetadata())
            .apply("ExtractPayload", Values.<String>create());

    data.apply(ParDo.of(new DoFn<String, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
            System.out.println(String.format("** element |%s| **", c.element()));
        }
    }));
    // We filter the events for a given country (IN=India) and send them to their own Topic
    final String country = "IN";
    PCollection<String> eventsInIndia = data.apply("FilterByCountry", ParDo.of(new DoFn<String, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
            //                    if (getCountry(c.element()).equals(country)){
            c.output(c.element());
            //                    }

        }
    }));

    PCollection<KV<String, String>> eventsInIndiaKV = eventsInIndia.apply("ExtractPayload",
            ParDo.of(new DoFn<String, KV<String, String>>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    c.output(KV.of("india", c.element()));
                }
            }));

    eventsInIndiaKV.apply("WriteToKafka",
            KafkaIO.<String, String>write().withBootstrapServers(options.getKafkaServer())
                    .withTopic(options.getOutputTopic())
                    .withKeySerializer(org.apache.kafka.common.serialization.StringSerializer.class)
                    .withValueSerializer(org.apache.kafka.common.serialization.StringSerializer.class));
    PipelineResult run = pipeline.run();
    run.waitUntilFinish(Duration.standardSeconds(options.getDuration()));
}

From source file:org.apache.beam.sdk.extensions.sql.impl.rel.BeamEnumerableConverter.java

License:Apache License

private static PipelineResult limitRun(PipelineOptions options, BeamRelNode node, DoFn<Row, Void> doFn,
        Queue<Row> values, int limitCount) {
    options.as(DirectOptions.class).setBlockOnRun(false);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
    resultCollection.apply(ParDo.of(doFn));

    PipelineResult result = pipeline.run();

    State state;/*  ww  w  .  j  a  va  2  s  .  c  om*/
    while (true) {
        // Check pipeline state in every second
        state = result.waitUntilFinish(Duration.standardSeconds(1));
        if (state != null && state.isTerminal()) {
            break;
        }

        try {
            if (values.size() >= limitCount) {
                result.cancel();
                break;
            }
        } catch (IOException e) {
            LOG.warn(e.toString());
            break;
        }
    }

    return result;
}