Example usage for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes)

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:org.apache.apex.malhar.stream.sample.WindowedWordCount.java

License:Apache License

/**
 * Populate dag with High-Level API./*from   w  w  w.  j a  v  a  2 s  .c om*/
 * @param dag
 * @param conf
 */
@Override
public void populateDAG(DAG dag, Configuration conf) {
    TextInput input = new TextInput();
    Collector collector = new Collector();

    // Create stream from the TextInput operator.
    ApexStream<Tuple.TimestampedTuple<String>> stream = StreamFactory
            .fromInput(input, input.output, name("input"))

            // Extract all the words from the input line of text.
            .flatMap(new Function.FlatMapFunction<String, String>() {
                @Override
                public Iterable<String> f(String input) {
                    return Arrays.asList(input.split("[\\p{Punct}\\s]+"));
                }
            }, name("ExtractWords"))

            // Wrap the word with a randomly generated timestamp.
            .map(new AddTimestampFn(), name("AddTimestampFn"));

    // apply window and trigger option.
    // TODO: change trigger option to atWaterMark when available.
    WindowedStream<Tuple.TimestampedTuple<String>> windowedWords = stream.window(
            new WindowOption.TimeWindows(Duration.standardMinutes(WINDOW_SIZE)),
            new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(1));

    WindowedStream<PojoEvent> wordCounts =
            // Perform a countByKey transformation to count the appearance of each word in every time window.
            windowedWords.countByKey(new Function.ToKeyValue<Tuple.TimestampedTuple<String>, String, Long>() {
                @Override
                public Tuple<KeyValPair<String, Long>> f(Tuple.TimestampedTuple<String> input) {
                    return new Tuple.TimestampedTuple<KeyValPair<String, Long>>(input.getTimestamp(),
                            new KeyValPair<String, Long>(input.getValue(), 1L));
                }
            }, name("count words"))

                    // Format the output and print out the result.
                    .map(new FormatAsTableRowFn(), name("FormatAsTableRowFn")).print(name("console"));

    wordCounts.endWith(collector, collector.input, name("Collector")).populateDag(dag);
}

From source file:org.apache.beam.examples.complete.AutoComplete.java

License:Apache License

public static void runAutocompletePipeline(Options options) throws IOException {

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {/*from w  w  w.j av  a2s .c  o m*/
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
            .apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags()))
            .apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply("FormatForDatastore",
                ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey())))
                .apply(DatastoreIO.v1().write().withProjectId(
                        MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        exampleUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(
                                options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND
                                        : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    if (options.getOutputToChecksum()) {
        PCollection<Long> checksum = toWrite
                .apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                        KV<String, List<CompletionCandidate>> elm = c.element();
                        Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
                        c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
                    }
                })).apply(Sum.longsGlobally());

        PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.game.GameStats.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from   w w  w  .ja  va  2s.co m
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline.apply(PubsubIO.readStrings()
            .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic()))
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                    .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply("FixedWindowsUser",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.asMap());

    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply("WindowIntoFixedWindows",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply("FilterOutSpammers", ParDo.of(new DoFn<GameActionInfo, GameActionInfo>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                    // If the user is not in the spammers Map, output the data element.
                    if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                        c.output(c.element());
                    }
                }
            }).withSideInputs(spammersView))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // [END DocInclude_FilterAndCalc]
            // Write the result to BigQuery
            .apply("WriteTeamSums",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getGameStatsTablePrefix() + "_team",
                            configureWindowedWrite()));

    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply("WindowIntoSessions",
                    Window.<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // [END DocInclude_SessionCalc]
            // [START DocInclude_Rewindow]
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply("WindowToExtractSessionMean",
                    Window.into(
                            FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply("WriteAvgSessionLength",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getGameStatsTablePrefix() + "_sessions",
                            configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.game.HourlyTeamScore.java

License:Apache License

/** Run a batch pipeline to do windowed analysis of the data. */
// [START DocInclude_HTSMain]
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));

    // Read 'gaming' events from a text file.
    pipeline.apply(TextIO.read().from(options.getInput()))
            // Parse the incoming data.
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))

            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for the day
            // that we want to analyze could potentially include some late-arriving data from the
            // previous day.
            // If so, we want to weed it out. Similarly, if we include data from the following day
            // (to scoop up late-arriving events from the day we're analyzing), we need to weed out
            // events that fall after the time period we want to analyze.
            // [START DocInclude_HTSFilters]
            .apply("FilterStartTime",
                    Filter.by((GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
            .apply("FilterEndTime",
                    Filter.by((GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
            // [END DocInclude_HTSFilters]

            // [START DocInclude_HTSAddTsAndWindow]
            // Add an element timestamp based on the event log, and apply fixed windowing.
            .apply("AddEventTimestamps", WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
            .apply("FixedWindowsTeam",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowDuration()))))
            // [END DocInclude_HTSAddTsAndWindow]

            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            .apply("WriteTeamScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), true));

    pipeline.run().waitUntilFinish();/*from  w w w.j  a v a 2s  .  c o  m*/
}

From source file:org.apache.beam.examples.complete.game.LeaderBoard.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from   w w  w .ja  va 2s .co m
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
    // data elements, and parse the data.
    PCollection<GameActionInfo> gameEvents = pipeline.apply(PubsubIO.readStrings()
            .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic()))
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));

    gameEvents
            .apply("CalculateTeamScores",
                    new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()),
                            Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply("WriteTeamScoreSums",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getLeaderBoardTableName() + "_team",
                            configureWindowedTableWrite()));
    gameEvents
            .apply("CalculateUserScores",
                    new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply("WriteUserScoreSums",
                    new WriteToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(),
                            options.getLeaderBoardTableName() + "_user", configureGlobalWindowBigQueryWrite()));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.TrafficMaxLaneFlow.java

License:Apache License

public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();//from   w ww . j  a  v  a  2s  .  c  o m

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractFlowInfoFn()))
            // map the incoming data stream into sliding windows.
            .apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.TrafficRoutes.java

License:Apache License

public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();//from  ww  w.  j  ava  2 s . com

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractStationSpeedFn()))
            // map the incoming data stream into sliding windows.
            .apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed())
            .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.snippets.Snippets.java

License:Apache License

public static void fileProcessPattern() throws Exception {
    Pipeline p = Pipeline.create();/*from  w w w . j av  a 2 s .  c o m*/

    // [START FileProcessPatternProcessNewFilesSnip1]
    // This produces PCollection<MatchResult.Metadata>
    p.apply(FileIO.match().filepattern("...").continuously(Duration.standardSeconds(30),
            Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1))));
    // [END FileProcessPatternProcessNewFilesSnip1]

    // [START FileProcessPatternProcessNewFilesSnip2]
    // This produces PCollection<String>
    p.apply(TextIO.read().from("<path-to-files>/*").watchForNewFiles(
            // Check for new files every minute.
            Duration.standardMinutes(1),
            // Stop watching the file pattern if no new files appear for an hour.
            Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1))));
    // [END FileProcessPatternProcessNewFilesSnip2]

    // [START FileProcessPatternAccessMetadataSnip1]
    p.apply(FileIO.match().filepattern("hdfs://path/to/*.gz"))
            // The withCompression method is optional. By default, the Beam SDK detects compression from
            // the filename.
            .apply(FileIO.readMatches().withCompression(Compression.GZIP))
            .apply(ParDo.of(new DoFn<FileIO.ReadableFile, String>() {
                @ProcessElement
                public void process(@Element FileIO.ReadableFile file) {
                    // We can now access the file and its metadata.
                    LOG.info("File Metadata resourceId is {} ", file.getMetadata().resourceId());
                }
            }));
    // [END FileProcessPatternAccessMetadataSnip1]

}

From source file:org.apache.beam.examples.tutorial.game.Exercise3.java

License:Apache License

/**
 * Run a batch pipeline to do windowed analysis of the data.
 *//*from  ww  w.jav  a2s .c o  m*/
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    ExerciseOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ExerciseOptions.class);
    Pipeline pipeline = Pipeline.create(options);

    pipeline
            // Read a bounded set of generated data
            .apply(new Input.BoundedGenerator())
            // Extract and sum the windowed teamname/scores
            .apply(new WindowedTeamScore(Duration.standardMinutes(1)))
            // Write the hourly team scores to the "hourly_team_score" table
            .apply(new Output.WriteHourlyTeamScore());

    pipeline.run();
}

From source file:org.apache.beam.examples.WindowedWordCount.java

License:Apache License

static void runWindowedWordCount(Options options) throws IOException {
    final String output = options.getOutput();
    final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
    final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());

    Pipeline pipeline = Pipeline.create(options);

    /*/*from   w  ww  .j a v  a2s. co  m*/
     * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
     * unbounded input source.
     */
    PCollection<String> input = pipeline
            /* Read from the GCS file. */
            .apply(TextIO.read().from(options.getInputFile()))
            // Concept #2: Add an element timestamp, using an artificial time just to show
            // windowing.
            // See AddTimestampFn for more detail on this.
            .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));

    /*
     * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
     * minute (you can change this with a command-line option). See the documentation for more
     * information on how fixed windows work, and for information on the other types of windowing
     * available (e.g., sliding windows).
     */
    PCollection<String> windowedWords = input
            .apply(Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

    /*
     * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
     * windows over a PCollection containing windowed values.
     */
    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

    /*
     * Concept #5: Format the results and write to a sharded file partitioned by window, using a
     * simple ParDo operation. Because there may be failures followed by retries, the
     * writes must be idempotent, but the details of writing to files is elided here.
     */
    wordCounts.apply(MapElements.via(new WordCount.FormatAsTextFn()))
            .apply(new WriteOneFilePerWindow(output, options.getNumShards()));

    PipelineResult result = pipeline.run();
    try {
        result.waitUntilFinish();
    } catch (Exception exc) {
        result.cancel();
    }
}