Example usage for org.joda.time Duration standardMinutes

List of usage examples for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes) 

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:org.apache.apex.malhar.stream.sample.WindowedWordCount.java

License:Apache License

/**
 * Populate dag with High-Level API./*from   w  w  w.  j a  v  a  2 s  .c om*/
 * @param dag
 * @param conf
 */
@Override
public void populateDAG(DAG dag, Configuration conf) {
    TextInput input = new TextInput();
    Collector collector = new Collector();

    // Create stream from the TextInput operator.
    ApexStream<Tuple.TimestampedTuple<String>> stream = StreamFactory
            .fromInput(input, input.output, name("input"))

            // Extract all the words from the input line of text.
            .flatMap(new Function.FlatMapFunction<String, String>() {
                @Override
                public Iterable<String> f(String input) {
                    return Arrays.asList(input.split("[\\p{Punct}\\s]+"));
                }
            }, name("ExtractWords"))

            // Wrap the word with a randomly generated timestamp.
            .map(new AddTimestampFn(), name("AddTimestampFn"));

    // apply window and trigger option.
    // TODO: change trigger option to atWaterMark when available.
    WindowedStream<Tuple.TimestampedTuple<String>> windowedWords = stream.window(
            new WindowOption.TimeWindows(Duration.standardMinutes(WINDOW_SIZE)),
            new TriggerOption().accumulatingFiredPanes().withEarlyFiringsAtEvery(1));

    WindowedStream<PojoEvent> wordCounts =
            // Perform a countByKey transformation to count the appearance of each word in every time window.
            windowedWords.countByKey(new Function.ToKeyValue<Tuple.TimestampedTuple<String>, String, Long>() {
                @Override
                public Tuple<KeyValPair<String, Long>> f(Tuple.TimestampedTuple<String> input) {
                    return new Tuple.TimestampedTuple<KeyValPair<String, Long>>(input.getTimestamp(),
                            new KeyValPair<String, Long>(input.getValue(), 1L));
                }
            }, name("count words"))

                    // Format the output and print out the result.
                    .map(new FormatAsTableRowFn(), name("FormatAsTableRowFn")).print(name("console"));

    wordCounts.endWith(collector, collector.input, name("Collector")).populateDag(dag);
}

From source file:org.apache.beam.examples.complete.AutoComplete.java

License:Apache License

public static void runAutocompletePipeline(Options options) throws IOException {

    options.setBigQuerySchema(FormatForBigquery.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);

    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {/*from w  w  w.j av  a2s .c  o m*/
        windowFn = new GlobalWindows();
    }

    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
            .apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags()))
            .apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));

    if (options.getOutputToDatastore()) {
        toWrite.apply("FormatForDatastore",
                ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey())))
                .apply(DatastoreIO.v1().write().withProjectId(
                        MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        exampleUtils.setupBigQueryTable();

        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());

        toWrite.apply(ParDo.of(new FormatForBigquery()))
                .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema())
                        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                        .withWriteDisposition(
                                options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND
                                        : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }

    if (options.getOutputToChecksum()) {
        PCollection<Long> checksum = toWrite
                .apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                        KV<String, List<CompletionCandidate>> elm = c.element();
                        Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
                        c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
                    }
                })).apply(Sum.longsGlobally());

        PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
    }

    // Run the pipeline.
    PipelineResult result = p.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.game.GameStats.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from   w w  w  .ja  va  2s.co m
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline.apply(PubsubIO.readStrings()
            .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic()))
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                    .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents
            .apply("FixedWindowsUser",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))

            // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
            // These might be robots/spammers.
            .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
            // Derive a view from the collection of spammer users. It will be used as a side input
            // in calculating the team score sums, below.
            .apply("CreateSpammersView", View.asMap());

    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents
            .apply("WindowIntoFixedWindows",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
            // Filter out the detected spammer users, using the side input derived above.
            .apply("FilterOutSpammers", ParDo.of(new DoFn<GameActionInfo, GameActionInfo>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                    // If the user is not in the spammers Map, output the data element.
                    if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                        c.output(c.element());
                    }
                }
            }).withSideInputs(spammersView))
            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            // [END DocInclude_FilterAndCalc]
            // Write the result to BigQuery
            .apply("WriteTeamSums",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getGameStatsTablePrefix() + "_team",
                            configureWindowedWrite()));

    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply("WindowIntoSessions",
                    Window.<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                            .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // [END DocInclude_SessionCalc]
            // [START DocInclude_Rewindow]
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply("WindowToExtractSessionMean",
                    Window.into(
                            FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply("WriteAvgSessionLength",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getGameStatsTablePrefix() + "_sessions",
                            configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.game.HourlyTeamScore.java

License:Apache License

/** Run a batch pipeline to do windowed analysis of the data. */
// [START DocInclude_HTSMain]
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));

    // Read 'gaming' events from a text file.
    pipeline.apply(TextIO.read().from(options.getInput()))
            // Parse the incoming data.
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))

            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for the day
            // that we want to analyze could potentially include some late-arriving data from the
            // previous day.
            // If so, we want to weed it out. Similarly, if we include data from the following day
            // (to scoop up late-arriving events from the day we're analyzing), we need to weed out
            // events that fall after the time period we want to analyze.
            // [START DocInclude_HTSFilters]
            .apply("FilterStartTime",
                    Filter.by((GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
            .apply("FilterEndTime",
                    Filter.by((GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
            // [END DocInclude_HTSFilters]

            // [START DocInclude_HTSAddTsAndWindow]
            // Add an element timestamp based on the event log, and apply fixed windowing.
            .apply("AddEventTimestamps", WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
            .apply("FixedWindowsTeam",
                    Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowDuration()))))
            // [END DocInclude_HTSAddTsAndWindow]

            // Extract and sum teamname/score pairs from the event data.
            .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
            .apply("WriteTeamScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), true));

    pipeline.run().waitUntilFinish();/*from  w w w.j  a v a 2s  .  c o  m*/
}

From source file:org.apache.beam.examples.complete.game.LeaderBoard.java

License:Apache License

public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from   w w  w .ja  va 2s .co m
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);

    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
    // data elements, and parse the data.
    PCollection<GameActionInfo> gameEvents = pipeline.apply(PubsubIO.readStrings()
            .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic()))
            .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));

    gameEvents
            .apply("CalculateTeamScores",
                    new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()),
                            Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply("WriteTeamScoreSums",
                    new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(),
                            options.getDataset(), options.getLeaderBoardTableName() + "_team",
                            configureWindowedTableWrite()));
    gameEvents
            .apply("CalculateUserScores",
                    new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness())))
            // Write the results to BigQuery.
            .apply("WriteUserScoreSums",
                    new WriteToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(),
                            options.getLeaderBoardTableName() + "_user", configureGlobalWindowBigQueryWrite()));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.TrafficMaxLaneFlow.java

License:Apache License

public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();//from   w ww . j  a  v  a  2s  .  c  o m

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractFlowInfoFn()))
            // map the incoming data stream into sliding windows.
            .apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new MaxLaneFlow())
            .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.complete.TrafficRoutes.java

License:Apache License

public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();//from  ww  w.  j  ava  2 s . com

    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());

    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
            // row... => <station route, station speed> ...
            .apply(ParDo.of(new ExtractStationSpeedFn()))
            // map the incoming data stream into sliding windows.
            .apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                    .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
            .apply(new TrackSpeed())
            .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

    // Run the pipeline.
    PipelineResult result = pipeline.run();

    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

From source file:org.apache.beam.examples.snippets.Snippets.java

License:Apache License

public static void fileProcessPattern() throws Exception {
    Pipeline p = Pipeline.create();/*from  w w w . j av  a 2 s .  c o m*/

    // [START FileProcessPatternProcessNewFilesSnip1]
    // This produces PCollection<MatchResult.Metadata>
    p.apply(FileIO.match().filepattern("...").continuously(Duration.standardSeconds(30),
            Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1))));
    // [END FileProcessPatternProcessNewFilesSnip1]

    // [START FileProcessPatternProcessNewFilesSnip2]
    // This produces PCollection<String>
    p.apply(TextIO.read().from("<path-to-files>/*").watchForNewFiles(
            // Check for new files every minute.
            Duration.standardMinutes(1),
            // Stop watching the file pattern if no new files appear for an hour.
            Watch.Growth.afterTimeSinceNewOutput(Duration.standardHours(1))));
    // [END FileProcessPatternProcessNewFilesSnip2]

    // [START FileProcessPatternAccessMetadataSnip1]
    p.apply(FileIO.match().filepattern("hdfs://path/to/*.gz"))
            // The withCompression method is optional. By default, the Beam SDK detects compression from
            // the filename.
            .apply(FileIO.readMatches().withCompression(Compression.GZIP))
            .apply(ParDo.of(new DoFn<FileIO.ReadableFile, String>() {
                @ProcessElement
                public void process(@Element FileIO.ReadableFile file) {
                    // We can now access the file and its metadata.
                    LOG.info("File Metadata resourceId is {} ", file.getMetadata().resourceId());
                }
            }));
    // [END FileProcessPatternAccessMetadataSnip1]

}

From source file:org.apache.beam.examples.tutorial.game.Exercise3.java

License:Apache License

/**
 * Run a batch pipeline to do windowed analysis of the data.
 *//*from  ww  w.jav  a2s .c o  m*/
public static void main(String[] args) throws Exception {
    // Begin constructing a pipeline configured by commandline flags.
    ExerciseOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ExerciseOptions.class);
    Pipeline pipeline = Pipeline.create(options);

    pipeline
            // Read a bounded set of generated data
            .apply(new Input.BoundedGenerator())
            // Extract and sum the windowed teamname/scores
            .apply(new WindowedTeamScore(Duration.standardMinutes(1)))
            // Write the hourly team scores to the "hourly_team_score" table
            .apply(new Output.WriteHourlyTeamScore());

    pipeline.run();
}

From source file:org.apache.beam.examples.WindowedWordCount.java

License:Apache License

static void runWindowedWordCount(Options options) throws IOException {
    final String output = options.getOutput();
    final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
    final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());

    Pipeline pipeline = Pipeline.create(options);

    /*/*from   w  ww  .j a v  a2s. co  m*/
     * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
     * unbounded input source.
     */
    PCollection<String> input = pipeline
            /* Read from the GCS file. */
            .apply(TextIO.read().from(options.getInputFile()))
            // Concept #2: Add an element timestamp, using an artificial time just to show
            // windowing.
            // See AddTimestampFn for more detail on this.
            .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));

    /*
     * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
     * minute (you can change this with a command-line option). See the documentation for more
     * information on how fixed windows work, and for information on the other types of windowing
     * available (e.g., sliding windows).
     */
    PCollection<String> windowedWords = input
            .apply(Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

    /*
     * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
     * windows over a PCollection containing windowed values.
     */
    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

    /*
     * Concept #5: Format the results and write to a sharded file partitioned by window, using a
     * simple ParDo operation. Because there may be failures followed by retries, the
     * writes must be idempotent, but the details of writing to files is elided here.
     */
    wordCounts.apply(MapElements.via(new WordCount.FormatAsTextFn()))
            .apply(new WriteOneFilePerWindow(output, options.getNumShards()));

    PipelineResult result = pipeline.run();
    try {
        result.waitUntilFinish();
    } catch (Exception exc) {
        result.cancel();
    }
}