Example usage for org.joda.time Duration standardMinutes

List of usage examples for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes) 

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.examples.TrafficStreamingRoutes.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *///from w w  w . j  a v  a  2 s . c  om
public static void main(String[] args) {
    TrafficStreamingRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficStreamingRoutesOptions.class);
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    dataflowOptions.setStreaming(true);

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(dataflowOptions.getProject());
    tableRef.setDatasetId(options.getDataset());
    tableRef.setTableId(options.getTable());
    pipeline.apply(PubsubIO.Read.topic(options.getInputTopic()))
            /* map the incoming data stream into sliding windows.
               The default window duration values work well if you're running the accompanying PubSub
               generator script without the --replay flag, so that there are no simulated pauses in
               the sensor data publication. You may want to adjust the values otherwise. */
            .apply(Window.<String>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed()).apply(BigQueryIO.Write.to(tableRef).withSchema(FormatStatsFn.getSchema()));

    /* When you are done running the example, cancel your pipeline so that you do not continue to
       be charged for its instances. You can do this by visiting
       https://console.developers.google.com/project/your-project-name/dataflow/job-id
       in the Developers Console. You should also terminate the generator script so that you do not
       use unnecessary PubSub quota. */
    pipeline.run();
}

From source file:com.google.cloud.dataflow.examples.WindowedWordCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setBigQuerySchema(getSchema());
    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
    // Pipeline.//from   www  .j a  va 2 s  . c  o  m
    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());

    Pipeline pipeline = Pipeline.create(options);

    /**
     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
     * unbounded input source.
     */
    PCollection<String> input;
    if (options.isUnbounded()) {
        LOG.info("Reading from PubSub.");
        /**
         * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
         * specified as an argument. The data elements' timestamps will come from the pubsub
         * injection.
         */
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()));
    } else {
        /** Else, this is a bounded pipeline. Read from the GCS file. */
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
                // See AddTimestampFn for more detail on this.
                .apply(ParDo.of(new AddTimestampFn()));
    }

    /**
     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
     * minute (you can change this with a command-line option). See the documentation for more
     * information on how fixed windows work, and for information on the other types of windowing
     * available (e.g., sliding windows).
     */
    PCollection<String> windowedWords = input
            .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

    /**
     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
     * windows over a PCollection containing windowed values.
     */
    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

    /**
     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
     * The BigQuery output source supports both bounded and unbounded data.
     */
    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
            .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema())
                    .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));

    PipelineResult result = pipeline.run();

    /**
     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
     * runs for a limited time, and publishes to the input PubSub topic.
     *
     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
     * are done with it, so that you do not continue to be charged for the instances. You can do
     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
     * pipelines. The PubSub topic will also be deleted at this time.
     */
    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
}

From source file:com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows.java

License:Apache License

static Duration getDefaultPeriod(Duration size) {
    if (size.isLongerThan(Duration.standardHours(1))) {
        return Duration.standardHours(1);
    }//from   ww w .ja v  a2 s.  co m
    if (size.isLongerThan(Duration.standardMinutes(1))) {
        return Duration.standardMinutes(1);
    }
    if (size.isLongerThan(Duration.standardSeconds(1))) {
        return Duration.standardSeconds(1);
    }
    return Duration.millis(1);
}

From source file:com.google.cloud.dataflow.starter.TrafficMaxLaneFlow.java

License:Apache License

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 *//*from  www . ja v  a 2s .co  m*/
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(TrafficMaxLaneFlowOptions.class);
    if (options.isStreaming()) {
        // In order to cancel the pipelines automatically,
        // {@literal DataflowPipelineRunner} is forced to be used.
        options.setRunner(DataflowPipelineRunner.class);
    }
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using DataflowExampleUtils to set up required resources.
    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
    dataflowUtils.setup();

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    PCollection<KV<String, LaneInfo>> input;
    if (options.isStreaming()) {
        input = pipeline.apply(PubsubIO.Read.topic(options.getPubsubTopic()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
    } else {
        input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
                // row... => <stationId, LaneInfo> ...
                .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
    }
    // map the incoming data stream into sliding windows. The default window duration values
    // work well if you're running the accompanying Pub/Sub generator script with the
    // --replay flag, which simulates pauses in the sensor data publication. You may want to
    // adjust them otherwise.
    input.apply(Window
            .<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    PipelineResult result = pipeline.run();
    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
        // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
        // === UNCOMMENT IF GCE DATALOADER IS NOT RUNNING ====
        dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
    }

    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
    dataflowUtils.waitToFinish(result);
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise2.java

License:Apache License

/** Run a batch pipeline. */
public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference tableRef = new TableReference();
    tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
    tableRef.setProjectId(options.as(GcpOptions.class).getProject());
    tableRef.setTableId(options.getOutputTableName());

    // Read events from a CSV file and parse them.
    pipeline.apply(TextIO.Read.from(options.getInput()))
            .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
            .apply("AddEventTimestamps", WithTimestamps.of((GameEvent i) -> new Instant(i.getTimestamp())))
            .apply("WindowedTeamScore", new WindowedTeamScore(Duration.standardMinutes(60)))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScoreSums").of(new FormatTeamScoreSumsFn()))
            .apply(BigQueryIO.Write.to(tableRef).withSchema(FormatTeamScoreSumsFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    pipeline.run();/*  w  ww .j  a  v a2  s  .  c om*/
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise3.java

License:Apache License

/** Run a batch or streaming pipeline. */
public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    Pipeline pipeline = Pipeline.create(options);

    TableReference tableRef = new TableReference();
    tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
    tableRef.setProjectId(options.as(GcpOptions.class).getProject());
    tableRef.setTableId(options.getOutputTableName());

    // Read events from either a CSV file or PubSub stream.
    pipeline.apply(new ReadGameEvents(options))
            .apply("WindowedTeamScore", new Exercise2.WindowedTeamScore(Duration.standardMinutes(60)))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScoreSums").of(new Exercise2.FormatTeamScoreSumsFn()))
            .apply(BigQueryIO.Write.to(tableRef).withSchema(Exercise2.FormatTeamScoreSumsFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    pipeline.run();/*from   w w  w .j  ava  2  s . c om*/
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise4.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise4Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise4Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//w w w . j  ava  2s. com
    // For example purposes, allow the pipeline to be easily cancelled instead of running
    // continuously.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName() + "_team");

    TableReference userTable = new TableReference();
    userTable.setDatasetId(options.getOutputDataset());
    userTable.setProjectId(options.getProject());
    userTable.setTableId(options.getOutputTableName() + "_user");

    PCollection<GameEvent> gameEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    gameEvents
            .apply("CalculateTeamScores",
                    new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()),
                            Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatTeamScores").of(new FormatTeamScoreFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamScoreFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    gameEvents
            .apply("CalculateUserScores",
                    new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply(ParDo.named("FormatUserScores").of(new FormatUserScoreFn()))
            .apply(BigQueryIO.Write.to(userTable).withSchema(FormatUserScoreFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise5.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise5Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise5Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/* w  ww.  jav  a2 s  . c  o  m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // [START EXERCISE 5 PART b]:
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameEvent>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            //  Use ParDo with spammersView side input to filter out spammers.
            .apply(/* TODO: YOUR CODE GOES HERE */ new ChangeMe<PCollection<GameEvent>, GameEvent>())
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
            // Write the result to BigQuery
            .apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));
    // [START EXERCISE 5 PART b]:

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.Exercise8.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise8Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise8Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//  ww  w. j  a  va 2  s . c o  m
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference badUserTable = new TableReference();
    badUserTable.setDatasetId(options.getOutputDataset());
    badUserTable.setProjectId(options.getProject());
    badUserTable.setTableId(options.getOutputTableName() + "_bad_users");

    // Read Events from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, GameEvent>> sessionedEvents = pipeline
            .apply("ReadGameScoreEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getTopic()))
            .apply("ParseGameScoreEvents", ParDo.of(new BuggyParseEventFn()))
            .apply("KeyGameScoreByEventId",
                    WithKeys.of((GameEvent event) -> event.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGameScoreEvents",
                    Window.<KV<String, GameEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline
            .apply("ReadGamePlayEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getPlayEventsTopic()))
            .apply("ParseGamePlayEvents", ParDo.of(new BuggyParsePlayEventFn()))
            .apply("KeyGamePlayByEventId",
                    WithKeys.of((PlayEvent play) -> play.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGamePlayEvents",
                    Window.<KV<String, PlayEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Compute per-user latency.
    PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents)
            .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create())
            .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn()));

    // Create a view onto quantiles of the global latency distribution.
    PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create())
            // Re-window session results into a global window, and trigger periodically making sure
            // to use the full accumulated window contents.
            .apply("GlobalWindowRetrigger",
                    Window.<Long>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(GLOBAL_AGGREGATE_TRIGGER_SEC))))
                            .accumulatingFiredPanes())
            .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles
                    .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT)
                            .asSingletonView());

    userLatency
            // Use the computed latency distribution as a side-input to filter out likely bad users.
            .apply("DetectBadUsers",
                    ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() {
                        public void processElement(ProcessContext c) {
                            String user = c.element().getKey();
                            Long latency = c.element().getValue();
                            List<Long> quantiles = c.sideInput(globalQuantiles);
                            // Users in the first quantile are considered spammers, since their
                            // score to play event latency is too low, suggesting a robot.
                            if (latency < quantiles.get(1)) {
                                c.output(user);
                            }
                        }
                    }))
            // We want to only emilt a single BigQuery row for every bad user. To do this, we
            // re-key by user, then window globally and trigger on the first element for each key.
            .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class)))
            .apply("GlobalWindowsTriggerOnFirst",
                    Window.<KV<String, String>>into(new GlobalWindows())
                            .triggering(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10)))
                            .accumulatingFiredPanes())
            .apply("GroupByUser", GroupByKey.<String, String>create())
            .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers",
                    BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema())
                            .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                            .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    userLatency.apply("ReKeyFn",
            // BUG4: We have a hot key. Especially when the cost of downstream fn is high, must
            // ensure we have good sharding.
            WithKeys.of((KV<String, Long> item) -> "").withKeyType(TypeDescriptor.of(String.class)))
            .apply("WindowAndTriggerOften",
                    Window.<KV<String, KV<String, Long>>>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10))))
                            .discardingFiredPanes())
            .apply("GroupByNewKey", GroupByKey.<String, KV<String, Long>>create())
            .apply("DoExpensiveWork", ParDo.of(new ExpensiveWorkPerElement()));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise5.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise5Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise5Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from ww w.  j a  v  a  2  s .c  om
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference teamTable = new TableReference();
    teamTable.setDatasetId(options.getOutputDataset());
    teamTable.setProjectId(options.getProject());
    teamTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply(Window.named("FixedWindowsUser").<KV<String, Integer>>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.<String, Integer>asMap());

    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply(Window.named("WindowIntoFixedWindows").<GameEvent>into(
                    FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply(ParDo.named("FilterOutSpammers").withSideInputs(spammersView)
                    .of(new DoFn<GameEvent, GameEvent>() {
                        @Override
                        public void processElement(ProcessContext c) {
                            // If the user is not in the spammers Map, output the data element.
                            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                                c.output(c.element());
                            }
                        }
                    }))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
            // Write the result to BigQuery
            .apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
            .apply(BigQueryIO.Write.to(teamTable).withSchema(FormatTeamWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}