Example usage for org.joda.time Duration standardMinutes

List of usage examples for org.joda.time Duration standardMinutes

Introduction

In this page you can find the example usage for org.joda.time Duration standardMinutes.

Prototype

public static Duration standardMinutes(long minutes) 

Source Link

Document

Create a duration with the specified number of minutes assuming that there are the standard number of milliseconds in a minute.

Usage

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise6.java

License:Apache License

public static void main(String[] args) throws Exception {

    Exercise6Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise6Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);//from ww  w.  ja  v  a 2  s  .  c o  m
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference sessionsTable = new TableReference();
    sessionsTable.setDatasetId(options.getOutputDataset());
    sessionsTable.setProjectId(options.getProject());
    sessionsTable.setTableId(options.getOutputTableName());

    PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));

    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore",
            MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
                    .withOutputType(new TypeDescriptor<KV<String, Integer>>() {
                    }));

    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents
            .apply(Window.named("WindowIntoSessions")
                    .<KV<String, Integer>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
                    .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
            // For this use, we care only about the existence of the session, not any particular
            // information aggregated over it, so the following is an efficient way to do that.
            .apply(Combine.perKey(x -> 0))
            // Get the duration per session.
            .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
            // Re-window to process groups of session sums according to when the sessions complete.
            .apply(Window.named("WindowToExtractSessionMean").<Integer>into(
                    FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
            // Find the mean session duration in each window.
            .apply(Mean.<Integer>globally().withoutDefaults())
            // Write this info to a BigQuery table.
            .apply(ParDo.named("FormatSessions").of(new FormatSessionWindowFn()))
            .apply(BigQueryIO.Write.to(sessionsTable).withSchema(FormatSessionWindowFn.getSchema())
                    .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                    .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise7.java

License:Apache License

public static void main(String[] args) throws Exception {
    Exercise7Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(Exercise7Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);/*w w  w.j a  v  a 2 s.  co m*/
    // Allow the pipeline to be cancelled automatically.
    options.setRunner(DataflowPipelineRunner.class);
    Pipeline pipeline = Pipeline.create(options);

    TableReference badUserTable = new TableReference();
    badUserTable.setDatasetId(options.getOutputDataset());
    badUserTable.setProjectId(options.getProject());
    badUserTable.setTableId(options.getOutputTableName() + "_bad_users");

    // Read Events from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, GameEvent>> sessionedEvents = pipeline
            .apply("ReadGameScoreEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getTopic()))
            .apply("ParseGameScoreEvents", ParDo.of(new ParseEventFn()))
            .apply("KeyGameScoreByEventId",
                    WithKeys.of((GameEvent event) -> event.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGameScoreEvents",
                    Window.<KV<String, GameEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES)))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label.
    PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline
            .apply("ReadGamePlayEvents",
                    PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE)
                            .topic(options.getPlayEventsTopic()))
            .apply("ParseGamePlayEvents", ParDo.of(new ParsePlayEventFn()))
            .apply("KeyGamePlayByEventId",
                    WithKeys.of((PlayEvent play) -> play.getEventId())
                            .withKeyType(TypeDescriptor.of(String.class)))
            .apply("SessionizeGamePlayEvents",
                    Window.<KV<String, PlayEvent>>into(
                            Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES)))
                            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()));

    // Compute per-user latency.
    PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents)
            .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create())
            .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn()));

    // Create a view onto quantiles of the global latency distribution.
    PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create())
            // Re-window session results into a global window, and trigger periodically making sure
            // to use the full accumulated window contents.
            .apply("GlobalWindowRetrigger",
                    Window.<Long>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(100)))
                            .accumulatingFiredPanes())
            .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles
                    .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT)
                            .asSingletonView());

    userLatency
            // Use the computed latency distribution as a side-input to filter out likely bad users.
            .apply("DetectBadUsers",
                    ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() {
                        public void processElement(ProcessContext c) {
                            String user = c.element().getKey();
                            Long latency = c.element().getValue();
                            List<Long> quantiles = c.sideInput(globalQuantiles);
                            // Users in the first quantile are considered spammers, since their
                            // score to play event latency is too low, suggesting a robot.
                            if (latency < quantiles.get(1)) {
                                c.output(user);
                            }
                        }
                    }))
            // We want to only emilt a single BigQuery row for every bad user. To do this, we
            // re-key by user, then window globally and trigger on the first element for each key.
            .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class)))
            .apply("GlobalWindowsTriggerOnFirst",
                    Window.<KV<String, String>>into(new GlobalWindows())
                            .triggering(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(10)))
                            .accumulatingFiredPanes())
            .apply("GroupByUser", GroupByKey.<String, String>create())
            .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers",
                    BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema())
                            .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
                            .withWriteDisposition(WriteDisposition.WRITE_APPEND));

    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
}

From source file:com.google.cloud.sparkdemo.HourlyTeamScore.java

License:Apache License

/**
 * Run a batch pipeline./*from   w ww .j  ava2 s . c  o  m*/
 **/
public static void main(String[] args) throws Exception {
    HourlyTeamScoreOptions options = new HourlyTeamScoreOptions();
    options.parse(args);

    SparkConf sc = new SparkConf().setAppName("HourlyTeamScore");
    JavaSparkContext jsc = new JavaSparkContext(sc);

    Configuration hadoopConf = jsc.hadoopConfiguration();
    configureBigQueryOutput(hadoopConf, options.getProject(), options.getDataset(), options.getTableName(),
            options.getTableSchema());

    final Long startMinTimestamp = timestampParser.parseMillis(options.getStartMin());
    final Long stopMinTimestamp = timestampParser.parseMillis(options.getStopMin());
    final Long windowDuration = Duration.standardMinutes(options.getWindowDuration()).getMillis();

    // Run a pipeline to analyze all the data in batch.
    // First, read events from a text file and parse them.
    JavaRDD<GameActionInfo> gameEvents = jsc.textFile(options.getInput()).flatMap(new ParseEventFn())
            // Filter out data before and after the given times so that it is not included
            // in the calculations. As we collect data in batches (say, by day), the batch for
            // the day that we want to analyze could potentially include some late-arriving
            // data from the previous day. If so, we want to weed it out. Similarly, if we include
            // data from the following day (to scoop up late-arriving events from the day we're
            // analyzing), we need to weed out events that fall after the time period we want to
            // analyze.
            .filter((GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp)
            .filter((GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp);

    JavaPairRDD<WithTimestamp<String>, Integer> hourlyTeamScores = gameEvents.mapToPair(event -> new Tuple2<>(
            // Extract the composite key as <team, window_start_time>
            WithTimestamp.create(event.getTeam(),
                    // Apply Fixed Window by rounding the timestamp down to the nearest
                    // multiple of the window size
                    (event.getTimestamp() / windowDuration) * windowDuration),
            // Extract the scores as values
            event.getScore()))
            // Compute the sum of the scores per team per window
            .reduceByKey(new SumScore());

    // Write to a BigQuery table
    JavaPairRDD<String, JsonObject> jsonPairs = hourlyTeamScores.mapToPair(convertToJson);
    jsonPairs.saveAsNewAPIHadoopDataset(hadoopConf);
}

From source file:com.google.cloud.training.dataanalyst.flights.PredictRealtime.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    // create pipeline from options
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    boolean streaming = options.getInput().contains("/topics/");
    if (streaming) {
        LOG.info("Creating real-time pipeline that reads from Pub/Sub I/O");
        options.setStreaming(true);/*from   ww w.ja v  a 2 s .  c o m*/
        options.setRunner(DataflowPipelineRunner.class);
    }
    Pipeline p = Pipeline.create(options);

    // read delays-*.csv into memory for use as a side-input
    PCollectionView<Map<String, Double>> delays = getAverageDelays(p, options.getDelayPath());

    // read flights, either batch or in 1-hr windows every minute
    PCollection<String> lines;
    if (streaming) {
        // real-time for pub-sub
        lines = p.apply("ReadLines", PubsubIO.Read.topic(options.getInput())) //
                .apply("window", Window.into(SlidingWindows//
                        .of(Duration.standardMinutes(60))//
                        .every(Duration.standardMinutes(1))));
    } else {
        // batch, from text
        lines = p.apply("ReadLines", TextIO.Read.from(options.getInput()));
    }

    PCollection<Flight> flights = lines.apply("ParseFlights",
            ParDo.withSideInputs(delays).of(new ParseFlights(delays, streaming))) //
    ;

    PCollectionView<Map<String, Double>> arrDelay = flights
            .apply("airport:hour", ParDo.of(new DoFn<Flight, KV<String, Double>>() {

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    Flight f = c.element();
                    if (f.arrHour != ParseFlights.INVALID_HOUR) {
                        String key = "arr_" + f.toAirport + ":" + f.date + ":" + f.arrHour;
                        double value = f.arrivalDelay;
                        c.output(KV.of(key, value));
                    }
                }

            })) //
            .apply(Mean.perKey()) //
            .apply(View.asMap());

    PCollection<String> pred = flights.apply("Predict",
            ParDo.withSideInputs(arrDelay).of(new DoFn<Flight, String>() {

                // FIXME: distribute predictions to different machines
                transient TensorflowModel tfModel = new TensorflowModel(options.getModelfile(),
                        options.getGraphfile());

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    Flight f = c.element();
                    if (f.arrHour == ParseFlights.INVALID_HOUR) {
                        // don't know when this flight is arriving, so predict ...
                        f = f.newCopy();
                        // get average arrival delay
                        String key = "arr_" + f.toAirport + ":" + f.date + ":" + (f.depHour - 1);
                        Double delay = c.sideInput(arrDelay).get(key);
                        f.averageArrivalDelay = (delay == null) ? 0 : delay;

                        // predict
                        boolean ontime = tfModel.predict(f.getInputFeatures()) > 0.5;

                        // output
                        c.output(f.line + "," + ontime);
                    }
                }
            }));

    if (streaming) {
        pred.apply("WriteFlights", PubsubIO.Write.topic(options.getOutput()));
    } else {
        pred.apply("WriteFlights", TextIO.Write.to(options.getOutput() + "flights").withSuffix(".csv"));
    }

    p.run();
}

From source file:com.google.cloud.training.dataanalyst.flights.PredictRealtime.java

License:Apache License

public static Instant toInstant(String date, String hourmin) {
    // e.g: 2015-01-01 and 0837
    int hrmin = Integer.parseInt(hourmin);
    int hr = hrmin / 100;
    int min = hrmin % 100;
    return Instant.parse(date) //
            .plus(Duration.standardHours(hr)) //
            .plus(Duration.standardMinutes(min));
}

From source file:com.google.cloud.training.dataanalyst.javahelp.f1_11_streaming_pipeline.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    options.setStreaming(true);//from www  .  j  av  a 2  s.c o  m
    Pipeline p = Pipeline.create(options);

    String topic = options.getInput();
    String output = options.getOutput();

    // Build the table schema for the output table.
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("timestamp").setType("TIMESTAMP"));
    fields.add(new TableFieldSchema().setName("num_words").setType("INTEGER"));
    TableSchema schema = new TableSchema().setFields(fields);

    p //
            .apply("GetMessages", PubsubIO.Read.topic(topic)) //
            .apply("window", Window.into(SlidingWindows//
                    .of(Duration.standardMinutes(2))//
                    .every(Duration.standardSeconds(30)))) //
            .apply("WordsPerLine", ParDo.of(new DoFn<String, Integer>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    String line = c.element();
                    c.output(line.split(" ").length);
                }
            }))//
            .apply("WordsInTimeWindow", Sum.integersGlobally().withoutDefaults()) //
            .apply("ToBQRow", ParDo.of(new DoFn<Integer, TableRow>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    TableRow row = new TableRow();
                    row.set("timestamp", new Date().getTime());
                    row.set("num_words", c.element());
                    c.output(row);
                }
            })) //
            .apply(BigQueryIO.Write.to(output)//
                    .withSchema(schema)//
                    .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));

    p.run();
}

From source file:com.google.cloud.training.dataanalyst.javahelp.StreamDemoConsumer.java

License:Apache License

@SuppressWarnings("serial")
public static void main(String[] args) {
    MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
    options.setRunner(DataflowPipelineRunner.class);
    options.setStreaming(true);/*from w w  w.j ava 2 s . c  o m*/

    Pipeline p = Pipeline.create(options);

    String topic = options.getInput();
    String output = options.getOutput();

    p //
            .apply("GetMessages", PubsubIO.Read.topic(topic)) //
            .apply("window", Window.into(SlidingWindows//
                    .of(Duration.standardMinutes(2))//
                    .every(Duration.standardSeconds(30)))) //
            .apply("LineLength", ParDo.of(new DoFn<String, Integer>() {
                @Override
                public void processElement(ProcessContext c) throws Exception {
                    String line = c.element();
                    c.output(line.length());
                }
            }))//
            .apply(Sum.integersGlobally().withoutDefaults()) //
            .apply("ToString", ParDo.of(new DoFn<Integer, String>() {

                @Override
                public void processElement(ProcessContext c) throws Exception {
                    c.output(c.element().toString());
                }

            })) //
            .apply(PubsubIO.Write.topic(output));

    p.run();
}

From source file:com.google.codelabs.dataflow.ExactDollarRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("extract dollars",
                    MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString()))
                            .withOutputType(TypeDescriptor.of(Double.class)))

            .apply("fixed window", Window.into(FixedWindows.of(Duration.standardMinutes(1))))
            .apply("trigger", Window
                    .<Double>triggering(AfterWatermark.pastEndOfWindow()
                            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
                                    .plusDelayOf(Duration.standardSeconds(1)))
                            .withLateFirings(AfterPane.elementCountAtLeast(1)))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(5)))

            .apply("sum whole window", Sum.doublesGlobally().withoutDefaults())
            .apply("format rides", ParDo.of(new TransformRides()))

            .apply(PubsubIO.Write//from  w  ww .  j  a  va2s.  com
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}

From source file:com.google.codelabs.dataflow.LatestRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("key rides by rideid",
                    MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
                            .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {
                            }))/*from ww w  .  j a v  a2s.co  m*/

            .apply("session windows on rides with early firings", Window
                    .<KV<String, TableRow>>into(Sessions.withGapDuration(Duration.standardMinutes(60)))
                    .triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(2000))))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.ZERO))

            .apply("group ride points on same ride", Combine.perKey(new LatestPointCombine()))

            .apply("discard key",
                    MapElements.via((KV<String, TableRow> a) -> a.getValue())
                            .withOutputType(TypeDescriptor.of(TableRow.class)))

            .apply(PubsubIO.Write
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}

From source file:com.google.codelabs.dataflow.PickupRides.java

License:Apache License

public static void main(String[] args) {
    CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
            .as(CustomPipelineOptions.class);
    Pipeline p = Pipeline.create(options);

    p.apply(PubsubIO.Read.named("read from PubSub")
            .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
            .timestampLabel("ts").withCoder(TableRowJsonCoder.of()))

            .apply("key rides by rideid",
                    MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
                            .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {
                            }))/*from   w w w  .j a v  a2 s.  c  om*/

            .apply("session windows on rides with early firings", Window
                    .<KV<String, TableRow>>into(Sessions.withGapDuration(Duration.standardMinutes(1)))
                    .triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(1000))))
                    .accumulatingFiredPanes().withAllowedLateness(Duration.ZERO))

            .apply("group ride points on same ride", Combine.perKey(new PickupPointCombine()))

            .apply("discard key",
                    MapElements.via((KV<String, TableRow> a) -> a.getValue())
                            .withOutputType(TypeDescriptor.of(TableRow.class)))

            .apply("filter if no pickup",
                    Filter.byPredicate((TableRow a) -> a.get("ride_status").equals("pickup")))

            .apply(PubsubIO.Write
                    .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s",
                            options.getSinkProject(), options.getSinkTopic()))
                    .withCoder(TableRowJsonCoder.of()));
    p.run();
}