Example usage for org.joda.time Duration standardSeconds

List of usage examples for org.joda.time Duration standardSeconds

Introduction

In this page you can find the example usage for org.joda.time Duration standardSeconds.

Prototype

public static Duration standardSeconds(long seconds) 

Source Link

Document

Create a duration with the specified number of seconds assuming that there are the standard number of milliseconds in a second.

Usage

From source file:org.apache.beam.runners.dataflow.worker.graph.CreateExecutableStageNodeFunction.java

License:Apache License

@Override
public Node apply(MutableNetwork<Node, Edge> input) {
    for (Node node : input.nodes()) {
        if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode
                || node instanceof InstructionOutputNode) {
            continue;
        }/*from w w w  .j a va2s . c  o m*/
        throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input));
    }

    // Fix all non output nodes to have named edges.
    for (Node node : input.nodes()) {
        if (node instanceof InstructionOutputNode) {
            continue;
        }
        for (Node successor : input.successors(node)) {
            for (Edge edge : input.edgesConnecting(node, successor)) {
                if (edge instanceof DefaultEdge) {
                    input.removeEdge(edge);
                    input.addEdge(node, successor,
                            MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId())));
                }
            }
        }
    }

    RunnerApi.Components.Builder componentsBuilder = RunnerApi.Components.newBuilder();
    componentsBuilder.mergeFrom(this.pipeline.getComponents());

    // We start off by replacing all edges within the graph with edges that have the named
    // outputs from the predecessor step. For ParallelInstruction Source nodes and RemoteGrpcPort
    // nodes this is a generated port id. All ParDoInstructions will have already

    // For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
    // TODO: create a correct windowing strategy, including coders and environment
    // An SdkFunctionSpec is invalid without a working environment reference. We can revamp that
    // when we inline SdkFunctionSpec and FunctionSpec, both slated for inlining wherever they occur

    // Default to use the Java environment if pipeline doesn't have environment specified.
    if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
        String envId = Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn() + idGenerator.getId();
        componentsBuilder.putEnvironments(envId, Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    }

    // By default, use GlobalWindow for all languages.
    // For java, if there is a IntervalWindowCoder, then use FixedWindow instead.
    // TODO: should get real WindowingStategy from pipeline proto.
    String globalWindowingStrategyId = "generatedGlobalWindowingStrategy" + idGenerator.getId();
    String intervalWindowEncodingWindowingStrategyId = "generatedIntervalWindowEncodingWindowingStrategy"
            + idGenerator.getId();

    SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents());
    try {
        registerWindowingStrategy(globalWindowingStrategyId, WindowingStrategy.globalDefault(),
                componentsBuilder, sdkComponents);
        registerWindowingStrategy(intervalWindowEncodingWindowingStrategyId,
                WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))), componentsBuilder,
                sdkComponents);
    } catch (IOException exc) {
        throw new RuntimeException("Could not convert default windowing stratey to proto", exc);
    }

    Map<Node, String> nodesToPCollections = new HashMap<>();
    ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder();

    ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder();
    ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap
            .builder();

    // A field of ExecutableStage which includes the PCollection goes to worker side.
    Set<PCollectionNode> executableStageOutputs = new HashSet<>();
    // A field of ExecutableStage which includes the PCollection goes to runner side.
    Set<PCollectionNode> executableStageInputs = new HashSet<>();

    for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) {
        InstructionOutput instructionOutput = node.getInstructionOutput();

        String coderId = "generatedCoder" + idGenerator.getId();
        String windowingStrategyId;
        try (ByteString.Output output = ByteString.newOutput()) {
            try {
                Coder<?> javaCoder = CloudObjects
                        .coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec()));
                Coder<?> elementCoder = ((WindowedValueCoder<?>) javaCoder).getValueCoder();
                sdkComponents.registerCoder(elementCoder);
                RunnerApi.Coder coderProto = CoderTranslation.toProto(elementCoder, sdkComponents);
                componentsBuilder.putCoders(coderId, coderProto);
                // For now, Dataflow runner harness only deal with FixedWindow.
                if (javaCoder instanceof FullWindowedValueCoder) {
                    FullWindowedValueCoder<?> windowedValueCoder = (FullWindowedValueCoder<?>) javaCoder;
                    Coder<?> windowCoder = windowedValueCoder.getWindowCoder();
                    if (windowCoder instanceof IntervalWindowCoder) {
                        windowingStrategyId = intervalWindowEncodingWindowingStrategyId;
                    } else if (windowCoder instanceof GlobalWindow.Coder) {
                        windowingStrategyId = globalWindowingStrategyId;
                    } else {
                        throw new UnsupportedOperationException(String.format(
                                "Dataflow portable runner harness doesn't support windowing with %s",
                                windowCoder));
                    }
                } else {
                    throw new UnsupportedOperationException(
                            "Dataflow portable runner harness only supports FullWindowedValueCoder");
                }
            } catch (IOException e) {
                throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s",
                        instructionOutput.getCodec(), instructionOutput), e);
            } catch (Exception e) {
                // Coder probably wasn't a java coder
                OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec());
                componentsBuilder.putCoders(coderId,
                        RunnerApi.Coder.newBuilder()
                                .setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString()))
                                .build());
                // For non-java coder, hope it's GlobalWindows by default.
                // TODO(BEAM-6231): Actually discover the right windowing strategy.
                windowingStrategyId = globalWindowingStrategyId;
            }
        } catch (IOException e) {
            throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s",
                    instructionOutput.getCodec(), instructionOutput), e);
        }

        // TODO(BEAM-6275): Set correct IsBounded on generated PCollections
        String pcollectionId = node.getPcollectionId();
        RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setCoderId(coderId)
                .setWindowingStrategyId(windowingStrategyId).setIsBounded(RunnerApi.IsBounded.Enum.BOUNDED)
                .build();
        nodesToPCollections.put(node, pcollectionId);
        componentsBuilder.putPcollections(pcollectionId, pCollection);

        // Check whether this output collection has consumers from worker side when
        // "use_executable_stage_bundle_execution"
        // is set
        if (isExecutableStageOutputPCollection(input, node)) {
            executableStageOutputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
        }
        if (isExecutableStageInputPCollection(input, node)) {
            executableStageInputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
        }
    }

    componentsBuilder.putAllCoders(sdkComponents.toComponents().getCodersMap());

    Set<PTransformNode> executableStageTransforms = new HashSet<>();
    Set<TimerReference> executableStageTimers = new HashSet<>();
    List<UserStateId> userStateIds = new ArrayList<>();
    Set<SideInputReference> executableStageSideInputs = new HashSet<>();

    for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) {
        ImmutableMap.Builder<String, PCollectionNode> sideInputIds = ImmutableMap.builder();
        ParallelInstruction parallelInstruction = node.getParallelInstruction();
        String ptransformId = "generatedPtransform" + idGenerator.getId();
        ptransformIdToNameContexts.put(ptransformId,
                NameContext.create(null, parallelInstruction.getOriginalName(),
                        parallelInstruction.getSystemName(), parallelInstruction.getName()));

        RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
        RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder();

        List<String> timerIds = new ArrayList<>();
        if (parallelInstruction.getParDo() != null) {
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
            String userFnClassName = userFnSpec.getClassName();

            if (userFnClassName.equals("CombineValuesFn") || userFnClassName.equals("KeyedCombineFn")) {
                transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec);
                ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList());
            } else {
                String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);

                RunnerApi.PTransform parDoPTransform = pipeline.getComponents()
                        .getTransformsOrDefault(parDoPTransformId, null);

                // TODO: only the non-null branch should exist; for migration ease only
                if (parDoPTransform != null) {
                    checkArgument(
                            parDoPTransform.getSpec().getUrn()
                                    .equals(PTransformTranslation.PAR_DO_TRANSFORM_URN),
                            "Found transform \"%s\" for ParallelDo instruction, "
                                    + " but that transform had unexpected URN \"%s\" (expected \"%s\")",
                            parDoPTransformId, parDoPTransform.getSpec().getUrn(),
                            PTransformTranslation.PAR_DO_TRANSFORM_URN);

                    RunnerApi.ParDoPayload parDoPayload;
                    try {
                        parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
                    } catch (InvalidProtocolBufferException exc) {
                        throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
                    }

                    // Build the necessary components to inform the SDK Harness of the pipeline's
                    // user timers and user state.
                    for (Map.Entry<String, RunnerApi.TimerSpec> entry : parDoPayload.getTimerSpecsMap()
                            .entrySet()) {
                        timerIds.add(entry.getKey());
                    }
                    for (Map.Entry<String, RunnerApi.StateSpec> entry : parDoPayload.getStateSpecsMap()
                            .entrySet()) {
                        UserStateId.Builder builder = UserStateId.newBuilder();
                        builder.setTransformId(parDoPTransformId);
                        builder.setLocalName(entry.getKey());
                        userStateIds.add(builder.build());
                    }

                    // To facilitate the creation of Set executableStageSideInputs.
                    for (String sideInputTag : parDoPayload.getSideInputsMap().keySet()) {
                        String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
                        RunnerApi.PCollection sideInputPCollection = pipeline.getComponents()
                                .getPcollectionsOrThrow(sideInputPCollectionId);

                        pTransform.putInputs(sideInputTag, sideInputPCollectionId);

                        PCollectionNode pCollectionNode = PipelineNode.pCollection(sideInputPCollectionId,
                                sideInputPCollection);
                        sideInputIds.put(sideInputTag, pCollectionNode);
                    }

                    // To facilitate the creation of Map(ptransformId -> pCollectionView), which is
                    // required by constructing an ExecutableStageNode.
                    ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder();
                    for (Map.Entry<String, RunnerApi.SideInput> sideInputEntry : parDoPayload.getSideInputsMap()
                            .entrySet()) {
                        pcollectionViews.add(RegisterNodeFunction.transformSideInputForRunner(pipeline,
                                parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue()));
                    }
                    ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build());

                    transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                            .setPayload(parDoPayload.toByteString());
                } else {
                    // legacy path - bytes are the SdkFunctionSpec's payload field, basically, and
                    // SDKs expect it in the PTransform's payload field
                    byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
                    transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN)
                            .setPayload(ByteString.copyFrom(userFnBytes));
                }

                if (parDoInstruction.getSideInputs() != null) {
                    ptransformIdToSideInputInfos.put(ptransformId,
                            forSideInputInfos(parDoInstruction.getSideInputs(), true));
                }
            }
        } else if (parallelInstruction.getRead() != null) {
            ReadInstruction readInstruction = parallelInstruction.getRead();
            CloudObject sourceSpec = CloudObject
                    .fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec());
            // TODO: Need to plumb through the SDK specific function spec.
            transformSpec.setUrn(JAVA_SOURCE_URN);
            try {
                byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE));
                ByteString sourceByteString = ByteString.copyFrom(serializedSource);
                transformSpec.setPayload(sourceByteString);
            } catch (Exception e) {
                throw new IllegalArgumentException(
                        String.format("Unable to process Read %s", parallelInstruction), e);
            }
        } else if (parallelInstruction.getFlatten() != null) {
            transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN);
        } else {
            throw new IllegalArgumentException(
                    String.format("Unknown type of ParallelInstruction %s", parallelInstruction));
        }

        // Even though this is a for-loop, there is only going to be a single PCollection as the
        // predecessor in a ParDo. This PCollection is called the "main input".
        for (Node predecessorOutput : input.predecessors(node)) {
            pTransform.putInputs("generatedInput" + idGenerator.getId(),
                    nodesToPCollections.get(predecessorOutput));
        }

        for (Edge edge : input.outEdges(node)) {
            Node nodeOutput = input.incidentNodes(edge).target();
            MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge;
            pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput));
        }

        pTransform.setSpec(transformSpec);
        PTransformNode pTransformNode = PipelineNode.pTransform(ptransformId, pTransform.build());
        executableStageTransforms.add(pTransformNode);

        for (String timerId : timerIds) {
            executableStageTimers.add(TimerReference.of(pTransformNode, timerId));
        }

        ImmutableMap<String, PCollectionNode> sideInputIdToPCollectionNodes = sideInputIds.build();
        for (String sideInputTag : sideInputIdToPCollectionNodes.keySet()) {
            SideInputReference sideInputReference = SideInputReference.of(pTransformNode, sideInputTag,
                    sideInputIdToPCollectionNodes.get(sideInputTag));
            executableStageSideInputs.add(sideInputReference);
        }

        executableStageTransforms.add(pTransformNode);
    }

    if (executableStageInputs.size() != 1) {
        throw new UnsupportedOperationException("ExecutableStage only support one input PCollection");
    }

    PCollectionNode executableInput = executableStageInputs.iterator().next();
    RunnerApi.Components executableStageComponents = componentsBuilder.build();

    // Get Environment from ptransform, otherwise, use JAVA_SDK_HARNESS_ENVIRONMENT as default.
    Environment executableStageEnv = getEnvironmentFromPTransform(executableStageComponents,
            executableStageTransforms);
    if (executableStageEnv == null) {
        executableStageEnv = Environments.JAVA_SDK_HARNESS_ENVIRONMENT;
    }

    Set<UserStateReference> executableStageUserStateReference = new HashSet<>();
    for (UserStateId userStateId : userStateIds) {
        executableStageUserStateReference
                .add(UserStateReference.fromUserStateId(userStateId, executableStageComponents));
    }

    ExecutableStage executableStage = ImmutableExecutableStage.ofFullComponents(executableStageComponents,
            executableStageEnv, executableInput, executableStageSideInputs, executableStageUserStateReference,
            executableStageTimers, executableStageTransforms, executableStageOutputs);
    return ExecutableStageNode.create(executableStage, ptransformIdToNameContexts.build(),
            ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build());
}

From source file:org.apache.beam.runners.direct.SplittableProcessElementsEvaluatorFactory.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator(
        AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT>> application,
        CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, PositionT> transform = application.getTransform();

    final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory
            .createEvaluator((AppliedPTransform) application,
                    (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(),
                    inputBundle.getKey(), application.getTransform().getSideInputs(),
                    application.getTransform().getMainOutputTag(),
                    application.getTransform().getAdditionalOutputTags().getAll(),
                    DoFnSchemaInformation.create(), Collections.emptyMap());
    final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator();
    final ProcessFn<InputT, OutputT, RestrictionT, PositionT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT>) ProcessFnRunner.class
            .cast(pde.getFnRunner()).getFn();

    final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext();
    processFn.setStateInternalsFactory(key -> stepContext.stateInternals());
    processFn.setTimerInternalsFactory(key -> stepContext.timerInternals());

    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
        private final OutputManager outputManager = pde.getOutputManager();

        @Override//  www  .j a  v a  2  s  . c o m
        public void outputWindowedValue(OutputT output, Instant timestamp,
                Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(),
                    WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag,
                AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
            transform.getFn(), options, outputWindowedValue,
            evaluationContext.createSideInputReader(transform.getSideInputs()), ses,
            // Setting small values here to stimulate frequent checkpointing and better exercise
            // splittable DoFn's in that respect.
            100, Duration.standardSeconds(1)));

    return evaluator;
}

From source file:org.apache.beam.runners.flink.examples.streaming.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setStreaming(true);//from ww  w  .j  ava2s. com
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
            .apply("WordStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes())
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    toWrite.apply("FormatForPerTaskFile", ParDo.of(new FormatForPerTaskLocalFile()))
            .apply(TextIO.Write.to("./outputAutoComplete.txt"));

    p.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.JoinExamples.java

License:Apache License

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setStreaming(true);// www . jav a 2s.  c  o  m
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

    Pipeline p = Pipeline.create(options);

    // the following two 'applys' create multiple inputs to our pipeline, one for each
    // of our two input sources.
    PCollection<String> streamA = p
            .apply("FirstStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes());
    PCollection<String> streamB = p
            .apply("SecondStream", Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3)))
            .apply(Window.<String>into(windowFn).triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO).discardingFiredPanes());

    PCollection<String> formattedResults = joinEvents(streamA, streamB);
    formattedResults.apply(TextIO.Write.to("./outputJoin.txt"));
    p.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.KafkaWindowedWordCountExample.java

License:Apache License

public static void main(String[] args) {
    PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
    KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args)
            .as(KafkaStreamingWordCountOptions.class);
    options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
    options.setStreaming(true);/*from w w  w.ja v  a  2s.  c o  m*/
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    System.out.println(options.getKafkaTopic() + " " + options.getZookeeper() + " " + options.getBroker() + " "
            + options.getGroup());
    Pipeline pipeline = Pipeline.create(options);

    Properties p = new Properties();
    p.setProperty("zookeeper.connect", options.getZookeeper());
    p.setProperty("bootstrap.servers", options.getBroker());
    p.setProperty("group.id", options.getGroup());

    // this is the Flink consumer that reads the input to
    // the program from a kafka topic.
    FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaTopic(),
            new SimpleStringSchema(), p);

    PCollection<String> words = pipeline
            .apply("StreamingWordCount", Read.from(UnboundedFlinkSource.of(kafkaConsumer)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
                    .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt"));

    pipeline.run();
}

From source file:org.apache.beam.runners.flink.examples.streaming.WindowedWordCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(StreamingWordCountOptions.class);
    options.setStreaming(true);//from   w  ww . j av a2  s . c  o m
    options.setWindowSize(10L);
    options.setSlide(5L);
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkRunner.class);

    LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() + " sec. and a slide of "
            + options.getSlide());

    Pipeline pipeline = Pipeline.create(options);

    PCollection<String> words = pipeline
            .apply("StreamingWordCount", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(Window
                    .<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize()))
                            .every(Duration.standardSeconds(options.getSlide())))
                    .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputWordCount.txt"));

    pipeline.run();
}

From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java

License:Apache License

@Override
public void initializeState(StateInitializationContext context) throws Exception {
    super.initializeState(context);

    checkState(doFn instanceof ProcessFn);

    // this will implicitly be keyed by the key of the incoming
    // element or by the key of a firing timer
    StateInternalsFactory<byte[]> stateInternalsFactory = key -> (StateInternals) keyedStateInternals;

    // this will implicitly be keyed like the StateInternalsFactory
    TimerInternalsFactory<byte[]> timerInternalsFactory = key -> timerInternals;

    executorService = Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory());

    ((ProcessFn) doFn).setStateInternalsFactory(stateInternalsFactory);
    ((ProcessFn) doFn).setTimerInternalsFactory(timerInternalsFactory);
    ((ProcessFn) doFn).setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn,
            serializedOptions.get(), new OutputWindowedValue<OutputT>() {
                @Override//from   w ww.  ja va 2 s.  c  o m
                public void outputWindowedValue(OutputT output, Instant timestamp,
                        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                    outputManager.output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
                }

                @Override
                public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag,
                        AdditionalOutputT output, Instant timestamp,
                        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                    outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
                }
            }, sideInputReader, executorService, 10000, Duration.standardSeconds(10)));
}

From source file:org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator.java

License:Apache License

@Override
public void close() throws Exception {
    super.close();

    executorService.shutdown();/*from  w ww . j  av a 2s  . c  o m*/

    long shutdownTimeout = Duration.standardSeconds(10).getMillis();
    try {
        if (!executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS)) {
            LOG.debug("The scheduled executor service did not properly terminate. Shutting " + "it down now.");
            executorService.shutdownNow();
        }
    } catch (InterruptedException e) {
        LOG.debug("Could not properly await the termination of the scheduled executor service.", e);
        executorService.shutdownNow();
    }
}

From source file:org.apache.beam.samples.unbounded.KafkaToKafka.java

License:Apache License

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    LOG.info(options.toString());//from ww w. j  a v a 2  s  . c o m
    System.out.println(options.toString());
    Pipeline pipeline = Pipeline.create(options);

    // now we connect to the queue and process every event
    PCollection<String> data = pipeline
            .apply("ReadFromKafka",
                    KafkaIO.<String, String>read().withBootstrapServers(options.getKafkaServer())
                            .withTopics(Collections.singletonList(options.getInputTopic())).withoutMetadata())
            .apply("ExtractPayload", Values.<String>create());

    data.apply(ParDo.of(new DoFn<String, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
            System.out.println(String.format("** element |%s| **", c.element()));
        }
    }));
    // We filter the events for a given country (IN=India) and send them to their own Topic
    final String country = "IN";
    PCollection<String> eventsInIndia = data.apply("FilterByCountry", ParDo.of(new DoFn<String, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
            //                    if (getCountry(c.element()).equals(country)){
            c.output(c.element());
            //                    }

        }
    }));

    PCollection<KV<String, String>> eventsInIndiaKV = eventsInIndia.apply("ExtractPayload",
            ParDo.of(new DoFn<String, KV<String, String>>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    c.output(KV.of("india", c.element()));
                }
            }));

    eventsInIndiaKV.apply("WriteToKafka",
            KafkaIO.<String, String>write().withBootstrapServers(options.getKafkaServer())
                    .withTopic(options.getOutputTopic())
                    .withKeySerializer(org.apache.kafka.common.serialization.StringSerializer.class)
                    .withValueSerializer(org.apache.kafka.common.serialization.StringSerializer.class));
    PipelineResult run = pipeline.run();
    run.waitUntilFinish(Duration.standardSeconds(options.getDuration()));
}

From source file:org.apache.beam.sdk.extensions.sql.impl.rel.BeamEnumerableConverter.java

License:Apache License

private static PipelineResult limitRun(PipelineOptions options, BeamRelNode node, DoFn<Row, Void> doFn,
        Queue<Row> values, int limitCount) {
    options.as(DirectOptions.class).setBlockOnRun(false);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
    resultCollection.apply(ParDo.of(doFn));

    PipelineResult result = pipeline.run();

    State state;/*  ww  w  .  j  a  va  2  s  .  c  om*/
    while (true) {
        // Check pipeline state in every second
        state = result.waitUntilFinish(Duration.standardSeconds(1));
        if (state != null && state.isTerminal()) {
            break;
        }

        try {
            if (values.size() >= limitCount) {
                result.cancel();
                break;
            }
        } catch (IOException e) {
            LOG.warn(e.toString());
            break;
        }
    }

    return result;
}