Example usage for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes)

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.examples.TrafficStreamingRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *///from w w  w . j  a v  a  2 s . c  om
public static void main(String[] args) {
    TrafficStreamingRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficStreamingRoutesOptions.class);
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    dataflowOptions.setStreaming(true);

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(dataflowOptions.getProject());
    tableRef.setDatasetId(options.getDataset());
    tableRef.setTableId(options.getTable());
    pipeline.apply(PubsubIO.Read.topic(options.getInputTopic()))
            /* map the incoming data stream into sliding windows.
               The default window duration values work well if you're running the accompanying PubSub
               generator script without the --replay flag, so that there are no simulated pauses in
               the sensor data publication. You may want to adjust the values otherwise. */
            .apply(Window.<String>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    /* When you are done running the example, cancel your pipeline so that you do not continue to
       be charged for its instances. You can do this by visiting
       https://console.developers.google.com/project/your-project-name/dataflow/job-id
       in the Developers Console. You should also terminate the generator script so that you do not
       use unnecessary PubSub quota. */
    pipeline.run();
}

From source file:com.google.cloud.dataflow.examples.WindowedWordCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setBigQuerySchema(getSchema());
    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
    // Pipeline.//from   www  .j a  va 2 s  . c  o  m
    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);

    /**
     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
     * unbounded input source.
     */
    PCollection<String> input;
    if (options.isUnbounded()) {
        LOG.info("Reading from PubSub.");
        /**
         * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
         * specified as an argument. The data elements' timestamps will come from the pubsub
         * injection.
         */
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()));
    } else {
        /** Else, this is a bounded pipeline. Read from the GCS file. */
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
                // See AddTimestampFn for more detail on this.
                .apply(ParDo.of(new AddTimestampFn()));
    }

    /**
     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
     * minute (you can change this with a command-line option). See the documentation for more
     * information on how fixed windows work, and for information on the other types of windowing
     * available (e.g., sliding windows).
     */
    PCollection<String> windowedWords = input
            .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

    /**
     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
     * windows over a PCollection containing windowed values.
     */
    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

    /**
     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
     * The BigQuery output source supports both bounded and unbounded data.
     */
    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
            .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema())
                    .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));

    PipelineResult result = pipeline.run();

    /**
     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
     * runs for a limited time, and publishes to the input PubSub topic.
     *
     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
     * are done with it, so that you do not continue to be charged for the instances. You can do
     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
     * pipelines. The PubSub topic will also be deleted at this time.
     */
    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
}

From source file:com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows.java

License:Apache License

static Duration getDefaultPeriod(Duration size) {
    if (size.isLongerThan(Duration.standardHours(1))) {
        return Duration.standardHours(1);
    }//from   ww w .ja v  a2 s.  co m
    if (size.isLongerThan(Duration.standardMinutes(1))) {
        return Duration.standardMinutes(1);
    }
    if (size.isLongerThan(Duration.standardSeconds(1))) {
        return Duration.standardSeconds(1);
    }
    return Duration.millis(1);
}

From source file:com.google.cloud.dataflow.starter.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from  www . ja v  a 2s .co  m*/
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, LaneInfo>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
    }
    // map the incoming data stream into sliding windows. The default window duration values
    // work well if you're running the accompanying Pub/Sub generator script with the
    // --replay flag, which simulates pauses in the sensor data publication. You may want to
    // adjust them otherwise.
    input.apply(Window
            .<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        // === UNCOMMENT IF GCE DATALOADER IS NOT RUNNING ====
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise2.java

License:Apache License

/** Run a batch pipeline. */
public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference tableRef = new TableReference();
    tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
    tableRef.setProjectId(options.as(GcpOptions.class).getProject());
    tableRef.setTableId(options.getOutputTableName());

    // Read events from a CSV file and parse them.
    pipeline.apply(TextIO.Read.from(options.getInput()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
            .apply("AddEventTimestamps", WithTimestamps.of((GameEvent i) -> new Instant(i.getTimestamp())))
            .apply("WindowedTeamScore", new WindowedTeamScore(Duration.standardMinutes(60)))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScoreSums").of(new FormatTeamScoreSumsFn()))
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatTeamScoreSumsFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    pipeline.run();/*  w  ww .j  a  v a2  s  .  c om*/
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise3.java

License:Apache License

/** Run a batch or streaming pipeline. */
public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    Pipeline pipeline = Pipeline.create(options);

    TableReference tableRef = new TableReference();
    tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
    tableRef.setProjectId(options.as(GcpOptions.class).getProject());
    tableRef.setTableId(options.getOutputTableName());

    // Read events from either a CSV file or PubSub stream.
    pipeline.apply(new ReadGameEvents(options))
            .apply("WindowedTeamScore", new Exercise2.WindowedTeamScore(Duration.standardMinutes(60)))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScoreSums").of(new Exercise2.FormatTeamScoreSumsFn()))
            .apply(BigQueryIO.Write.to(tableRef).withSchema(Exercise2.FormatTeamScoreSumsFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    pipeline.run();/*from   w w  w .j  ava  2  s . c om*/
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise4.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise4Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise4Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//w w w . j  ava  2s. com
    // For example purposes, allow the pipeline to be easily cancelled instead of running
    // continuously.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName() + "_team");

    TableReference userTable = new TableReference();
    userTable.setDatasetId(options.getOutputDataset());
    userTable.setProjectId(options.getProject());
    userTable.setTableId(options.getOutputTableName() + "_user");

    PCollection<GameEvent> gameEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    gameEvents
            .apply("CalculateTeamScores",
                    new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()),
                            Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScores").of(new FormatTeamScoreFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamScoreFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    gameEvents
            .apply("CalculateUserScores",
                    new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatUserScores").of(new FormatUserScoreFn()))
            .apply(BigQueryIO.Write.to(userTable).withSchema(FormatUserScoreFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise5.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise5Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise5Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/* w  ww.  jav  a2 s  . c  o  m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // [START EXERCISE 5 PART b]:
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameEvent>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            //  Use ParDo with spammersView side input to filter out spammers.
            .apply(/* TODO: YOUR CODE GOES HERE */ new ChangeMe<PCollection<GameEvent>, GameEvent>())
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
            // Write the result to BigQuery
            .apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));
    // [START EXERCISE 5 PART b]:

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise8.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise8Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise8Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//  ww  w. j  a  va 2  s . c o  m
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference badUserTable = new TableReference();
    badUserTable.setDatasetId(options.getOutputDataset());
    badUserTable.setProjectId(options.getProject());
    badUserTable.setTableId(options.getOutputTableName() + "_bad_users");

    // Read Events from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, GameEvent>> sessionedEvents = pipeline
            .apply("ReadGameScoreEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getTopic()))
            .apply("ParseGameScoreEvents", ParDo.of(new BuggyParseEventFn()))
            .apply("KeyGameScoreByEventId",
                    WithKeys.of((GameEvent event) -> event.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGameScoreEvents",
                    Window.<KV<String, GameEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline
            .apply("ReadGamePlayEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getPlayEventsTopic()))
            .apply("ParseGamePlayEvents", ParDo.of(new BuggyParsePlayEventFn()))
            .apply("KeyGamePlayByEventId",
                    WithKeys.of((PlayEvent play) -> play.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGamePlayEvents",
                    Window.<KV<String, PlayEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Compute per-user latency.
    PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents)
            .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create())
            .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn()));

    // Create a view onto quantiles of the global latency distribution.
    PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create())
            // Re-window session results into a global window, and trigger periodically making sure
            // to use the full accumulated window contents.
            .apply("GlobalWindowRetrigger",
                    Window.<Long>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(GLOBAL_AGGREGATE_TRIGGER_SEC))))
                            .accumulatingFiredPanes())
            .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles
                    .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT)
                            .asSingletonView());

    userLatency
            // Use the computed latency distribution as a side-input to filter out likely bad users.
            .apply("DetectBadUsers",
                    ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() {
                        public void processElement(ProcessContext c) {
                            String user = c.element().getKey();
                            Long latency = c.element().getValue();
                            List<Long> quantiles = c.sideInput(globalQuantiles);
                            // Users in the first quantile are considered spammers, since their
                            // score to play event latency is too low, suggesting a robot.
                            if (latency < quantiles.get(1)) {
                                c.output(user);
                            }
                        }
                    }))
            // We want to only emilt a single BigQuery row for every bad user. To do this, we
            // re-key by user, then window globally and trigger on the first element for each key.
            .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class)))
            .apply("GlobalWindowsTriggerOnFirst",
                    Window.<KV<String, String>>into(new GlobalWindows())
                            .triggering(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10)))
                            .accumulatingFiredPanes())
            .apply("GroupByUser", GroupByKey.<String, String>create())
            .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers",
                    BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema())
                            .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                            .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    userLatency.apply("ReKeyFn",
            // BUG4: We have a hot key. Especially when the cost of downstream fn is high, must
            // ensure we have good sharding.
            WithKeys.of((KV<String, Long> item) -> "").withKeyType(TypeDescriptor.of(String.class)))
            .apply("WindowAndTriggerOften",
                    Window.<KV<String, KV<String, Long>>>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10))))
                            .discardingFiredPanes())
            .apply("GroupByNewKey", GroupByKey.<String, KV<String, Long>>create())
            .apply("DoExpensiveWork", ParDo.of(new ExpensiveWorkPerElement()));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise5.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise5Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise5Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from ww w.  j a  v  a  2  s .c  om
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameEvent>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply(ParDo.named("FilterOutSpammers").withSideInputs(spammersView)
                    .of(new DoFn<GameEvent, GameEvent>() {
                        @Override
                        public void processElement(ProcessContext c) {
                            // If the user is not in the spammers Map, output the data element.
                            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                                c.output(c.element());
                            }
                        }
                    }))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
            // Write the result to BigQuery
            .apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}