Example usage for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes)

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise6.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise6Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise6Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from ww  w.  ja  v  a 2  s  .  c o  m
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference sessionsTable = new TableReference();
    sessionsTable.setDatasetId(options.getOutputDataset());
    sessionsTable.setProjectId(options.getProject());
    sessionsTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply(Window.named("WindowIntoSessions")
                    .<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                    .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply(Window.named("WindowToExtractSessionMean").<Integer>into(
                    FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply(ParDo.named("FormatSessions").of(new FormatSessionWindowFn()))
            .apply(BigQueryIO.Write.to(sessionsTable).withSchema(FormatSessionWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise7.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise7Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise7Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*w w  w.j a  v  a 2 s.  co m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference badUserTable = new TableReference();
    badUserTable.setDatasetId(options.getOutputDataset());
    badUserTable.setProjectId(options.getProject());
    badUserTable.setTableId(options.getOutputTableName() + "_bad_users");

    // Read Events from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, GameEvent>> sessionedEvents = pipeline
            .apply("ReadGameScoreEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getTopic()))
            .apply("ParseGameScoreEvents", ParDo.of(new ParseEventFn()))
            .apply("KeyGameScoreByEventId",
                    WithKeys.of((GameEvent event) -> event.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGameScoreEvents",
                    Window.<KV<String, GameEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES)))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline
            .apply("ReadGamePlayEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getPlayEventsTopic()))
            .apply("ParseGamePlayEvents", ParDo.of(new ParsePlayEventFn()))
            .apply("KeyGamePlayByEventId",
                    WithKeys.of((PlayEvent play) -> play.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGamePlayEvents",
                    Window.<KV<String, PlayEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES)))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Compute per-user latency.
    PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents)
            .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create())
            .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn()));

    // Create a view onto quantiles of the global latency distribution.
    PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create())
            // Re-window session results into a global window, and trigger periodically making sure
            // to use the full accumulated window contents.
            .apply("GlobalWindowRetrigger",
                    Window.<Long>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(100)))
                            .accumulatingFiredPanes())
            .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles
                    .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT)
                            .asSingletonView());

    userLatency
            // Use the computed latency distribution as a side-input to filter out likely bad users.
            .apply("DetectBadUsers",
                    ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() {
                        public void processElement(ProcessContext c) {
                            String user = c.element().getKey();
                            Long latency = c.element().getValue();
                            List<Long> quantiles = c.sideInput(globalQuantiles);
                            // Users in the first quantile are considered spammers, since their
                            // score to play event latency is too low, suggesting a robot.
                            if (latency < quantiles.get(1)) {
                                c.output(user);
                            }
                        }
                    }))
            // We want to only emilt a single BigQuery row for every bad user. To do this, we
            // re-key by user, then window globally and trigger on the first element for each key.
            .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class)))
            .apply("GlobalWindowsTriggerOnFirst",
                    Window.<KV<String, String>>into(new GlobalWindows())
                            .triggering(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10)))
                            .accumulatingFiredPanes())
            .apply("GroupByUser", GroupByKey.<String, String>create())
            .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers",
                    BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema())
                            .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                            .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.sparkdemo.HourlyTeamScore.java

License:Apache License

/**
 * Run a batch pipeline./*from   w ww .j  ava2 s . c  o  m*/
 **/
public static void main(String[] args) throws Exception {
    HourlyTeamScoreOptions options = new HourlyTeamScoreOptions();
    options.parse(args);

    SparkConf sc = new SparkConf().setAppName("HourlyTeamScore");
    JavaSparkContext jsc = new JavaSparkContext(sc);

    Configuration hadoopConf = jsc.hadoopConfiguration();
    configureBigQueryOutput(hadoopConf, options.getProject(), options.getDataset(), options.getTableName(),
            options.getTableSchema());

    final Long startMinTimestamp = timestampParser.parseMillis(options.getStartMin());
    final Long stopMinTimestamp = timestampParser.parseMillis(options.getStopMin());
    final Long windowDuration = Duration.standardMinutes(options.getWindowDuration()).getMillis();

    // Run a pipeline to analyze all the data in batch.
    // First, read events from a text file and parse them.
    JavaRDD<GameActionInfo> gameEvents = jsc.textFile(options.getInput()).flatMap(new ParseEventFn())
            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for
            // the day that we want to analyze could potentially include some late-arriving
            // data from the previous day. If so, we want to weed it out. Similarly, if we include
            // data from the following day (to scoop up late-arriving events from the day we're
            // analyzing), we need to weed out events that fall after the time period we want to
            // analyze.
            .filter((GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp)
            .filter((GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp);

    JavaPairRDD<WithTimestamp<String>, Integer> hourlyTeamScores = gameEvents.mapToPair(event -> new Tuple2<>(
            // Extract the composite key as <team, window_start_time>
            WithTimestamp.create(event.getTeam(),
                    // Apply Fixed Window by rounding the timestamp down to the nearest
                    // multiple of the window size
                    (event.getTimestamp() / windowDuration) * windowDuration),
            // Extract the scores as values
            event.getScore()))
            // Compute the sum of the scores per team per window
            .reduceByKey(new SumScore());

    // Write to a BigQuery table
    JavaPairRDD<String, JsonObject> jsonPairs = hourlyTeamScores.mapToPair(convertToJson);
    jsonPairs.saveAsNewAPIHadoopDataset(hadoopConf);
}

From source file:com.google.cloud.training.dataanalyst.flights.PredictRealtime.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    // create pipeline from options
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    boolean streaming = options.getInput().contains("/topics/");
    if (streaming) {
        LOG.info("Creating real-time pipeline that reads from Pub/Sub I/O");
        options.setStreaming(true);/*from   ww w.ja v  a 2 s .  c o m*/
        options.setRunner(DataflowPipelineRunner.class);
    }
    Pipeline p = Pipeline.create(options);

    // read delays-*.csv into memory for use as a side-input
    PCollectionView<Map<String, Double>> delays = getAverageDelays(p, options.getDelayPath());

    // read flights, either batch or in 1-hr windows every minute
    PCollection<String> lines;
    if (streaming) {
        // real-time for pub-sub
        lines = p.apply("ReadLines", PubsubIO.Read.topic(options.getInput())) //
                .apply("window", Window.into(SlidingWindows//
                        .of(Duration.standardMinutes(60))//
                        .every(Duration.standardMinutes(1))));
    } else {
        // batch, from text
        lines = p.apply("ReadLines", TextIO.Read.from(options.getInput()));
    }

    PCollection<Flight> flights = lines.apply("ParseFlights",
            ParDo.withSideInputs(delays).of(new ParseFlights(delays, streaming))) //
    ;

    PCollectionView<Map<String, Double>> arrDelay = flights
            .apply("airport:hour", ParDo.of(new DoFn<Flight, KV<String, Double>>() {

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    Flight f = c.element();
                    if (f.arrHour != ParseFlights.INVALID_HOUR) {
                        String key = "arr_" + f.toAirport + ":" + f.date + ":" + f.arrHour;
                        double value = f.arrivalDelay;
                        c.output(KV.of(key, value));
                    }
                }

            })) //
            .apply(Mean.perKey()) //
            .apply(View.asMap());

    PCollection<String> pred = flights.apply("Predict",
            ParDo.withSideInputs(arrDelay).of(new DoFn<Flight, String>() {

                // FIXME: distribute predictions to different machines
                transient TensorflowModel tfModel = new TensorflowModel(options.getModelfile(),
                        options.getGraphfile());

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    Flight f = c.element();
                    if (f.arrHour == ParseFlights.INVALID_HOUR) {
                        // don't know when this flight is arriving, so predict ...
                        f = f.newCopy();
                        // get average arrival delay
                        String key = "arr_" + f.toAirport + ":" + f.date + ":" + (f.depHour - 1);
                        Double delay = c.sideInput(arrDelay).get(key);
                        f.averageArrivalDelay = (delay == null) ? 0 : delay;

                        // predict
                        boolean ontime = tfModel.predict(f.getInputFeatures()) > 0.5;

                        // output
                        c.output(f.line + "," + ontime);
                    }
                }
            }));

    if (streaming) {
        pred.apply("WriteFlights", PubsubIO.Write.topic(options.getOutput()));
    } else {
        pred.apply("WriteFlights", TextIO.Write.to(options.getOutput() + "flights").withSuffix(".csv"));
    }

    p.run();
}

From source file:com.google.cloud.training.dataanalyst.flights.PredictRealtime.java

License:Apache License

public static Instant toInstant(String date, String hourmin) {
    // e.g: 2015-01-01 and 0837
    int hrmin = Integer.parseInt(hourmin);
    int hr = hrmin / 100;
    int min = hrmin % 100;
    return Instant.parse(date) //
            .plus(Duration.standardHours(hr)) //
            .plus(Duration.standardMinutes(min));
}

From source file:com.google.cloud.training.dataanalyst.javahelp.f1_11_streaming_pipeline.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    options.setStreaming(true);//from www  .  j  av  a 2  s.c o  m
    Pipeline p = Pipeline.create(options);

    String topic = options.getInput();
    String output = options.getOutput();

    // Build the table schema for the output table.
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("timestamp").setType("TIMESTAMP"));
    fields.add(new TableFieldSchema().setName("num_words").setType("INTEGER"));
    TableSchema schema = new TableSchema().setFields(fields);

    p //
            .apply("GetMessages", PubsubIO.Read.topic(topic)) //
            .apply("window", Window.into(SlidingWindows//
                    .of(Duration.standardMinutes(2))//
                    .every(Duration.standardSeconds(30)))) //
            .apply("WordsPerLine", ParDo.of(new DoFn<String, Integer>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    String line = c.element();
                    c.output(line.split(" ").length);
                }
            }))//
            .apply("WordsInTimeWindow", Sum.integersGlobally().withoutDefaults()) //
            .apply("ToBQRow", ParDo.of(new DoFn<Integer, TableRow>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    TableRow row = new TableRow();
                    row.set("timestamp", new Date().getTime());
                    row.set("num_words", c.element());
                    c.output(row);
                }
            })) //
            .apply(BigQueryIO.Write.to(output)//
                    .withSchema(schema)//
                    .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));

    p.run();
}

From source file:com.google.cloud.training.dataanalyst.javahelp.StreamDemoConsumer.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    options.setRunner(DataflowPipelineRunner.class);
    options.setStreaming(true);/*from w w  w.j ava 2 s . c  o m*/

    Pipeline p = Pipeline.create(options);

    String topic = options.getInput();
    String output = options.getOutput();

    p //
            .apply("GetMessages", PubsubIO.Read.topic(topic)) //
            .apply("window", Window.into(SlidingWindows//
                    .of(Duration.standardMinutes(2))//
                    .every(Duration.standardSeconds(30)))) //
            .apply("LineLength", ParDo.of(new DoFn<String, Integer>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    String line = c.element();
                    c.output(line.length());
                }
            }))//
            .apply(Sum.integersGlobally().withoutDefaults()) //
            .apply("ToString", ParDo.of(new DoFn<Integer, String>() {

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    c.output(c.element().toString());
                }

            })) //
            .apply(PubsubIO.Write.topic(output));

    p.run();
}

From source file:com.google.codelabs.dataflow.ExactDollarRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("extract dollars",
                    MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString()))
                            .withOutputType(TypeDescriptor.of(Double.class)))

            .apply("fixed window", Window.into(FixedWindows.of(Duration.standardMinutes(1))))
            .apply("trigger", Window
                    .<Double>triggering(AfterWatermark.pastEndOfWindow()
                            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(1)))
                            .withLateFirings(AfterPane.elementCountAtLeast(1)))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(5)))

            .apply("sum whole window", Sum.doublesGlobally().withoutDefaults())
            .apply("format rides", ParDo.of(new TransformRides()))

            .apply(PubsubIO.Write//from  w  ww .  j  a  va2s.  com
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}

From source file:com.google.codelabs.dataflow.LatestRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("key rides by rideid",
                    MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
                            .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {
                            }))/*from ww w  .  j a v  a2s.co  m*/

            .apply("session windows on rides with early firings", Window
                    .<KV<String, TableRow>>into(Sessions.withGapDuration(Duration.standardMinutes(60)))
                    .triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(2000))))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.ZERO))

            .apply("group ride points on same ride", Combine.perKey(new LatestPointCombine()))

            .apply("discard key",
                    MapElements.via((KV<String, TableRow> a) -> a.getValue())
                            .withOutputType(TypeDescriptor.of(TableRow.class)))

            .apply(PubsubIO.Write
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}

From source file:com.google.codelabs.dataflow.PickupRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("key rides by rideid",
                    MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
                            .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {
                            }))/*from   w w w  .j a v  a2 s.  c  om*/

            .apply("session windows on rides with early firings", Window
                    .<KV<String, TableRow>>into(Sessions.withGapDuration(Duration.standardMinutes(1)))
                    .triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(1000))))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.ZERO))

            .apply("group ride points on same ride", Combine.perKey(new PickupPointCombine()))

            .apply("discard key",
                    MapElements.via((KV<String, TableRow> a) -> a.getValue())
                            .withOutputType(TypeDescriptor.of(TableRow.class)))

            .apply("filter if no pickup",
                    Filter.byPredicate((TableRow a) -> a.get("ride_status").equals("pickup")))

            .apply(PubsubIO.Write
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}