Example usage for org.joda.time Duration standardMinutes

List of usage examples for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes) 

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.examples.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }/*from ww  w  .  ja  va  2  s .com*/

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    PTransform<? super PBegin, PCollection<String>> readSource;
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        Preconditions.checkArgument(!options.getOutputToDatastore(),
                "DatastoreIO is not supported in streaming.");
        dataflowUtils.setupPubsubTopic();

        readSource = PubsubIO.Read.topic(options.getPubsubTopic());
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        readSource = TextIO.Read.from(options.getInputFile());
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource)
            .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn))
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind())))
                .apply(DatastoreIO.writeTo(options.getProject()));
    }
    if (options.getOutputToBigQuery()) {
        dataflowUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.AutoComplete.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }//ww  w  .  j  av  a  2s. c  o  m

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    PTransform<? super PBegin, PCollection<String>> readSource;
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        Preconditions.checkArgument(!options.getOutputToDatastore(),
                "DatastoreIO is not supported in streaming.");
        dataflowUtils.setupPubsub();

        readSource = PubsubIO.Read.topic(options.getPubsubTopic());
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        readSource = TextIO.Read.from(options.getInputFile());
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(readSource)
            .apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn))
            .apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply(ParDo.named("FormatForDatastore")
                .of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey())))
                .apply(DatastoreIO.v1().write().withProjectId(
                        MoreObjects.firstNonNull(options.getOutputDataset(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        dataflowUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(
                                options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND
                                        : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.game.GameStats.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*from   w  w  w  . j a va 2  s . c o m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline
            .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameActionInfo>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply(ParDo.named("FilterOutSpammers").withSideInputs(spammersView)
                    .of(new DoFn<GameActionInfo, GameActionInfo>() {
                        @Override
                        public void processElement(ProcessContext c) {
                            // If the user is not in the spammers Map, output the data element.
                            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                                c.output(c.element());
                            }
                        }
                    }))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // [END DocInclude_FilterAndCalc]
            // Write the result to BigQuery
            .apply("WriteTeamSums", new WriteWindowedToBigQuery<KV<String, Integer>>(
                    options.getTablePrefix() + "_team", configureWindowedWrite()));

    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply(Window.named("WindowIntoSessions")
                    .<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                    .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // [END DocInclude_SessionCalc]
            // [START DocInclude_Rewindow]
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply(Window.named("WindowToExtractSessionMean").<Integer>into(
                    FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply("WriteAvgSessionLength", new WriteWindowedToBigQuery<Double>(
                    options.getTablePrefix() + "_sessions", configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.game.HourlyTeamScore.java

License:Apache License

/**
 * Run a batch pipeline to do windowed analysis of the data.
 *///from w w w  . j  av a  2s . c  om
// [START DocInclude_HTSMain]
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));

    // Read 'gaming' events from a text file.
    pipeline.apply(TextIO.Read.from(options.getInput()))
            // Parse the incoming data.
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))

            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for the day
            // that we want to analyze could potentially include some late-arriving data from the previous
            // day. If so, we want to weed it out. Similarly, if we include data from the following day
            // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
            // that fall after the time period we want to analyze.
            // [START DocInclude_HTSFilters]
            .apply("FilterStartTime",
                    Filter.byPredicate(
                            (GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
            .apply("FilterEndTime",
                    Filter.byPredicate(
                            (GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
            // [END DocInclude_HTSFilters]

            // [START DocInclude_HTSAddTsAndWindow]
            // Add an element timestamp based on the event log, and apply fixed windowing.
            .apply("AddEventTimestamps", WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
            .apply(Window.named("FixedWindowsTeam").<GameActionInfo>into(
                    FixedWindows.of(Duration.standardMinutes(options.getWindowDuration()))))
            // [END DocInclude_HTSAddTsAndWindow]

            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team")).apply("WriteTeamScoreSums",
                    new WriteWindowedToBigQuery<KV<String, Integer>>(options.getTableName(),
                            configureWindowedTableWrite()));

    pipeline.run();
}

From source file:com.google.cloud.dataflow.examples.complete.game.LeaderBoard.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*w w w.  j  a  v  a 2  s .c  o m*/
    // For example purposes, allow the pipeline to be easily cancelled instead of running
    // continuously.
    options.setRunner(DataflowPipelineRunner.class);
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
    // data elements, and parse the data.
    PCollection<GameActionInfo> gameEvents = pipeline
            .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));

    // [START DocInclude_WindowAndTrigger]
    // Extract team/score pairs from the event stream, using hour-long windows by default.
    gameEvents
            .apply(Window.named("LeaderboardTeamFixedWindows")
                    .<GameActionInfo>into(
                            FixedWindows.of(Duration.standardMinutes(options.getTeamWindowDuration())))
                    // We will get early (speculative) results as well as cumulative
                    // processing of late data.
                    .triggering(AfterWatermark.pastEndOfWindow()
                            .withEarlyFirings(
                                    AfterProcessingTime.pastFirstElementInPane().plusDelayOf(FIVE_MINUTES))
                            .withLateFirings(
                                    AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
                    .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
                    .accumulatingFiredPanes())
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // Write the results to BigQuery.
            .apply("WriteTeamScoreSums", new WriteWindowedToBigQuery<KV<String, Integer>>(
                    options.getTableName() + "_team", configureWindowedTableWrite()));
    // [END DocInclude_WindowAndTrigger]

    // [START DocInclude_ProcTimeTrigger]
    // Extract user/score pairs from the event stream using processing time, via global windowing.
    // Get periodic updates on all users' running scores.
    gameEvents.apply(Window.named("LeaderboardUserGlobalWindow").<GameActionInfo>into(new GlobalWindows())
            // Get periodic results every ten minutes.
            .triggering(
                    Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
            .accumulatingFiredPanes()
            .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness())))
            // Extract and sum username/score pairs from the event data.
            .apply("ExtractUserScore", new ExtractAndSumScore("user"))
            // Write the results to BigQuery.
            .apply("WriteUserScoreSums", new WriteToBigQuery<KV<String, Integer>>(
                    options.getTableName() + "_user", configureGlobalWindowBigQueryWrite()));
    // [END DocInclude_ProcTimeTrigger]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *///from  ww w . ja va  2  s .c o m
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<String> input;
    if (options.isUnbounded()) {
        // Read unbounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()));
    } else {
        // Read bounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));

        // To read bounded TextIO files, use:
        // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
    }
    input
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractFlowInfoFn()))
            // map the incoming data stream into sliding windows. The default window duration values
            // work well if you're running the accompanying Pub/Sub generator script with the
            // --replay flag, which simulates pauses in the sensor data publication. You may want to
            // adjust them otherwise.
            .apply(Window.<KV<String, LaneInfo>>into(
                    SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                            .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
    if (!Strings.isNullOrEmpty(options.getInputFile()) && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
        dataflowUtils.runInjectorPipeline(new ReadFileAndExtractTimestamps(options.getInputFile()),
                options.getPubsubTopic(), PUBSUB_TIMESTAMP_LABEL_KEY);
    }

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.complete.TrafficRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from www . ja v a  2s .  co  m*/
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficRoutesOptions.class);

    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<String> input;
    if (options.isUnbounded()) {
        // Read unbounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()));
    } else {
        // Read bounded PubSubIO.
        input = pipeline.apply(PubsubIO.Read.timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
                .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));

        // To read bounded TextIO files, use:
        // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
        //    .apply(ParDo.of(new ExtractTimestamps()));
    }
    input
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractStationSpeedFn()))
            // map the incoming data stream into sliding windows.
            // The default window duration values work well if you're running the accompanying Pub/Sub
            // generator script without the --replay flag, so that there are no simulated pauses in
            // the sensor data publication. You may want to adjust the values otherwise.
            .apply(Window.<KV<String, StationSpeed>>into(
                    SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                            .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
    if (!Strings.isNullOrEmpty(options.getInputFile()) && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
        dataflowUtils.runInjectorPipeline(new ReadFileAndExtractTimestamps(options.getInputFile()),
                options.getPubsubTopic(), PUBSUB_TIMESTAMP_LABEL_KEY);
    }

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from   w  w w .ja v  a 2s  .  c  om*/
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, LaneInfo>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
    }
    // map the incoming data stream into sliding windows. The default window duration values
    // work well if you're running the accompanying Pub/Sub generator script with the
    // --replay flag, which simulates pauses in the sensor data publication. You may want to
    // adjust them otherwise.
    input.apply(Window
            .<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from ww w  .j av  a 2  s .com*/
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficRoutesOptions.class);

    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, StationSpeed>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <station route, station speed> ...
                .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
    }

    // map the incoming data stream into sliding windows.
    // The default window duration values work well if you're running the accompanying Pub/Sub
    // generator script without the --replay flag, so that there are no simulated pauses in
    // the sensor data publication. You may want to adjust the values otherwise.
    input.apply(Window.<KV<String, StationSpeed>>into(
            SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.examples.TrafficStreamingMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 */// w w w . ja va2  s .  c o  m
public static void main(String[] args) {
    TrafficStreamingMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficStreamingMaxLaneFlowOptions.class);
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    dataflowOptions.setStreaming(true);

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(dataflowOptions.getProject());
    tableRef.setDatasetId(options.getDataset());
    tableRef.setTableId(options.getTable());
    pipeline.apply(PubsubIO.Read.topic(options.getInputTopic()))
            /* map the incoming data stream into sliding windows. The default window duration values
               work well if you're running the accompanying PubSub generator script with the
               --replay flag, which simulates pauses in the sensor data publication. You may want to
               adjust them otherwise. */
            .apply(Window.<String>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    /* When you are done running the example, cancel your pipeline so that you do not continue to
       be charged for its instances. You can do this by visiting
       https://console.developers.google.com/project/your-project-name/dataflow/job-id
       in the Developers Console. You should also terminate the generator script so that you do not
       use unnecessary PubSub quota. */
    pipeline.run();
}