Example usage for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes)

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.examples.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }/*from ww  w  .  ja  va  2  s .com*/

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    PTransform<? super PBegin, PCollection<String>> readSource;
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        Preconditions.checkArgument(!options.getOutputToDatastore(),
                "DatastoreIO is not supported in streaming.");
        dataflowUtils.setupPubsubTopic();

        readSource = PubsubIO.Read.topic(options.getPubsubTopic());
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        readSource = TextIO.Read.from(options.getInputFile());
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource)
            .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn))
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind())))
                .apply(DatastoreIO.writeTo(options.getProject()));
    }
    if (options.getOutputToBigQuery()) {
        dataflowUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }//ww  w  .  j  av  a  2s. c  o  m

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    PTransform<? super PBegin, PCollection<String>> readSource;
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        Preconditions.checkArgument(!options.getOutputToDatastore(),
                "DatastoreIO is not supported in streaming.");
        dataflowUtils.setupPubsub();

        readSource = PubsubIO.Read.topic(options.getPubsubTopic());
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        readSource = TextIO.Read.from(options.getInputFile());
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource)
            .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn))
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply(ParDo.named("FormatForDatastore")
                .of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey())))
                .apply(DatastoreIO.v1().write().withProjectId(
                        MoreObjects.firstNonNull(options.getOutputDataset(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        dataflowUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(
                                options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND
                                        : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.game.GameStats.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*from   w  w  w  . j a va 2  s . c o m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline
            .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameActionInfo>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply(ParDo.named("FilterOutSpammers").withSideInputs(spammersView)
                    .of(new DoFn<GameActionInfo, GameActionInfo>() {
                        @Override
                        public void processElement(ProcessContext c) {
                            // If the user is not in the spammers Map, output the data element.
                            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                                c.output(c.element());
                            }
                        }
                    }))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // [END DocInclude_FilterAndCalc]
            // Write the result to BigQuery
            .apply("WriteTeamSums", new WriteWindowedToBigQuery<KV<String, Integer>>(
                    options.getTablePrefix() + "_team", configureWindowedWrite()));

    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply(Window.named("WindowIntoSessions")
                    .<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                    .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // [END DocInclude_SessionCalc]
            // [START DocInclude_Rewindow]
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply(Window.named("WindowToExtractSessionMean").<Integer>into(
                    FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply("WriteAvgSessionLength", new WriteWindowedToBigQuery<Double>(
                    options.getTablePrefix() + "_sessions", configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.game.HourlyTeamScore.java

License:Apache License

/**
 * Run a batch pipeline to do windowed analysis of the data.
 *///from w w w  . j  av a  2s . c  om
// [START DocInclude_HTSMain]
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));

    // Read 'gaming' events from a text file.
    pipeline.apply(TextIO.Read.from(options.getInput()))
            // Parse the incoming data.
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))

            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for the day
            // that we want to analyze could potentially include some late-arriving data from the previous
            // day. If so, we want to weed it out. Similarly, if we include data from the following day
            // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
            // that fall after the time period we want to analyze.
            // [START DocInclude_HTSFilters]
            .apply("FilterStartTime",
                    Filter.byPredicate(
                            (GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
            .apply("FilterEndTime",
                    Filter.byPredicate(
                            (GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
            // [END DocInclude_HTSFilters]

            // [START DocInclude_HTSAddTsAndWindow]
            // Add an element timestamp based on the event log, and apply fixed windowing.
            .apply("AddEventTimestamps", WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
            .apply(Window.named("FixedWindowsTeam").<GameActionInfo>into(
                    FixedWindows.of(Duration.standardMinutes(options.getWindowDuration()))))
            // [END DocInclude_HTSAddTsAndWindow]

            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team")).apply("WriteTeamScoreSums",
                    new WriteWindowedToBigQuery<KV<String, Integer>>(options.getTableName(),
                            configureWindowedTableWrite()));

    pipeline.run();
}

From source file:com.google.cloud.dataflow.examples.complete.game.LeaderBoard.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*w w w.  j  a  v  a 2  s .c  o m*/
    // For example purposes, allow the pipeline to be easily cancelled instead of running
    // continuously.
    options.setRunner(DataflowPipelineRunner.class);
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
    // data elements, and parse the data.
    PCollection<GameActionInfo> gameEvents = pipeline
            .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));

    // [START DocInclude_WindowAndTrigger]
    // Extract team/score pairs from the event stream, using hour-long windows by default.
    gameEvents
            .apply(Window.named("LeaderboardTeamFixedWindows")
                    .<GameActionInfo>into(
                            FixedWindows.of(Duration.standardMinutes(options.getTeamWindowDuration())))
                    // We will get early (speculative) results as well as cumulative
                    // processing of late data.
                    .triggering(AfterWatermark.pastEndOfWindow()
                            .withEarlyFirings(
                                    AfterProcessingTime.pastFirstElementInPane().plusDelayOf(FIVE_MINUTES))
                            .withLateFirings(
                                    AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
                    .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
                    .accumulatingFiredPanes())
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // Write the results to BigQuery.
            .apply("WriteTeamScoreSums", new WriteWindowedToBigQuery<KV<String, Integer>>(
                    options.getTableName() + "_team", configureWindowedTableWrite()));
    // [END DocInclude_WindowAndTrigger]

    // [START DocInclude_ProcTimeTrigger]
    // Extract user/score pairs from the event stream using processing time, via global windowing.
    // Get periodic updates on all users' running scores.
    gameEvents.apply(Window.named("LeaderboardUserGlobalWindow").<GameActionInfo>into(new GlobalWindows())
            // Get periodic results every ten minutes.
            .triggering(
                    Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
            .accumulatingFiredPanes()
            .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness())))
            // Extract and sum username/score pairs from the event data.
            .apply("ExtractUserScore", new ExtractAndSumScore("user"))
            // Write the results to BigQuery.
            .apply("WriteUserScoreSums", new WriteToBigQuery<KV<String, Integer>>(
                    options.getTableName() + "_user", configureGlobalWindowBigQueryWrite()));
    // [END DocInclude_ProcTimeTrigger]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *///from  ww w . ja va  2  s .c o m
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<String> input;
    if (options.isUnbounded()) {
        // Read unbounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()));
    } else {
        // Read bounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));

        // To read bounded TextIO files, use:
        // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
    }
    input
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractFlowInfoFn()))
            // map the incoming data stream into sliding windows. The default window duration values
            // work well if you're running the accompanying Pub/Sub generator script with the
            // --replay flag, which simulates pauses in the sensor data publication. You may want to
            // adjust them otherwise.
            .apply(Window.<KV<String, LaneInfo>>into(
                    SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                            .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
    if (!Strings.isNullOrEmpty(options.getInputFile()) && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
        dataflowUtils.runInjectorPipeline(new ReadFileAndExtractTimestamps(options.getInputFile()),
                options.getPubsubTopic(), PUBSUB_TIMESTAMP_LABEL_KEY);
    }

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.TrafficRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from www . ja v a  2s .  co  m*/
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficRoutesOptions.class);

    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<String> input;
    if (options.isUnbounded()) {
        // Read unbounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()));
    } else {
        // Read bounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));

        // To read bounded TextIO files, use:
        // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
        //    .apply(ParDo.of(new ExtractTimestamps()));
    }
    input
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractStationSpeedFn()))
            // map the incoming data stream into sliding windows.
            // The default window duration values work well if you're running the accompanying Pub/Sub
            // generator script without the --replay flag, so that there are no simulated pauses in
            // the sensor data publication. You may want to adjust the values otherwise.
            .apply(Window.<KV<String, StationSpeed>>into(
                    SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                            .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
    if (!Strings.isNullOrEmpty(options.getInputFile()) && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
        dataflowUtils.runInjectorPipeline(new ReadFileAndExtractTimestamps(options.getInputFile()),
                options.getPubsubTopic(), PUBSUB_TIMESTAMP_LABEL_KEY);
    }

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from   w  w w .ja v  a 2s  .  c  om*/
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, LaneInfo>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
    }
    // map the incoming data stream into sliding windows. The default window duration values
    // work well if you're running the accompanying Pub/Sub generator script with the
    // --replay flag, which simulates pauses in the sensor data publication. You may want to
    // adjust them otherwise.
    input.apply(Window
            .<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from ww w  .j av  a 2  s .com*/
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficRoutesOptions.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, StationSpeed>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <station route, station speed> ...
                .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
    }

    // map the incoming data stream into sliding windows.
    // The default window duration values work well if you're running the accompanying Pub/Sub
    // generator script without the --replay flag, so that there are no simulated pauses in
    // the sensor data publication. You may want to adjust the values otherwise.
    input.apply(Window.<KV<String, StationSpeed>>into(
            SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficStreamingMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 */// w w w . ja va2  s .  c o  m
public static void main(String[] args) {
    TrafficStreamingMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficStreamingMaxLaneFlowOptions.class);
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    dataflowOptions.setStreaming(true);

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(dataflowOptions.getProject());
    tableRef.setDatasetId(options.getDataset());
    tableRef.setTableId(options.getTable());
    pipeline.apply(PubsubIO.Read.topic(options.getInputTopic()))
            /* map the incoming data stream into sliding windows. The default window duration values
               work well if you're running the accompanying PubSub generator script with the
               --replay flag, which simulates pauses in the sensor data publication. You may want to
               adjust them otherwise. */
            .apply(Window.<String>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    /* When you are done running the example, cancel your pipeline so that you do not continue to
       be charged for its instances. You can do this by visiting
       https://console.developers.google.com/project/your-project-name/dataflow/job-id
       in the Developers Console. You should also terminate the generator script so that you do not
       use unnecessary PubSub quota. */
    pipeline.run();
}