List of usage examples for org.joda.time Duration standardSeconds
public static Duration standardSeconds(long seconds)
From source file:com.google.cloud.dataflow.tutorials.game.solutions.Exercise7.java
License:Apache License
public static void main(String[] args) throws Exception { Exercise7Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Exercise7Options.class); // Enforce that this pipeline is always run in streaming mode. options.setStreaming(true);//from w w w .j a v a 2 s . c o m // Allow the pipeline to be cancelled automatically. options.setRunner(DataflowPipelineRunner.class); Pipeline pipeline = Pipeline.create(options); TableReference badUserTable = new TableReference(); badUserTable.setDatasetId(options.getOutputDataset()); badUserTable.setProjectId(options.getProject()); badUserTable.setTableId(options.getOutputTableName() + "_bad_users"); // Read Events from Pub/Sub using custom timestamps and custom message id label. PCollection<KV<String, GameEvent>> sessionedEvents = pipeline .apply("ReadGameScoreEvents", PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE) .topic(options.getTopic())) .apply("ParseGameScoreEvents", ParDo.of(new ParseEventFn())) .apply("KeyGameScoreByEventId", WithKeys.of((GameEvent event) -> event.getEventId()) .withKeyType(TypeDescriptor.of(String.class))) .apply("SessionizeGameScoreEvents", Window.<KV<String, GameEvent>>into( Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())); // Read PlayEvents from Pub/Sub using custom timestamps and custom message id label. PCollection<KV<String, PlayEvent>> sessionedPlayEvents = pipeline .apply("ReadGamePlayEvents", PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).idLabel(MESSAGE_ID_ATTRIBUTE) .topic(options.getPlayEventsTopic())) .apply("ParseGamePlayEvents", ParDo.of(new ParsePlayEventFn())) .apply("KeyGamePlayByEventId", WithKeys.of((PlayEvent play) -> play.getEventId()) .withKeyType(TypeDescriptor.of(String.class))) .apply("SessionizeGamePlayEvents", Window.<KV<String, PlayEvent>>into( Sessions.withGapDuration(Duration.standardMinutes(SESSION_GAP_MINUTES))) .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())); // Compute per-user latency. PCollection<KV<String, Long>> userLatency = KeyedPCollectionTuple.of(playTag, sessionedPlayEvents) .and(eventTag, sessionedEvents).apply("JoinScorePlayEvents", CoGroupByKey.create()) .apply("ComputeLatency", ParDo.of(new ComputeLatencyFn())); // Create a view onto quantiles of the global latency distribution. PCollectionView<List<Long>> globalQuantiles = userLatency.apply("GetLatencies", Values.create()) // Re-window session results into a global window, and trigger periodically making sure // to use the full accumulated window contents. .apply("GlobalWindowRetrigger", Window.<Long>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(100))) .accumulatingFiredPanes()) .apply(((Combine.Globally<Long, List<Long>>) ApproximateQuantiles .<Long>globally(GLOBAL_LATENCY_QUANTILES)).withFanout(GLOBAL_AGGREGATE_FANOUT) .asSingletonView()); userLatency // Use the computed latency distribution as a side-input to filter out likely bad users. .apply("DetectBadUsers", ParDo.withSideInputs(globalQuantiles).of(new DoFn<KV<String, Long>, String>() { public void processElement(ProcessContext c) { String user = c.element().getKey(); Long latency = c.element().getValue(); List<Long> quantiles = c.sideInput(globalQuantiles); // Users in the first quantile are considered spammers, since their // score to play event latency is too low, suggesting a robot. if (latency < quantiles.get(1)) { c.output(user); } } })) // We want to only emilt a single BigQuery row for every bad user. To do this, we // re-key by user, then window globally and trigger on the first element for each key. .apply("KeyByUser", WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class))) .apply("GlobalWindowsTriggerOnFirst", Window.<KV<String, String>>into(new GlobalWindows()) .triggering(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(10))) .accumulatingFiredPanes()) .apply("GroupByUser", GroupByKey.<String, String>create()) .apply("FormatBadUsers", ParDo.of(new FormatBadUserFn())).apply("WriteBadUsers", BigQueryIO.Write.to(badUserTable).withSchema(FormatBadUserFn.getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the // command line. PipelineResult result = pipeline.run(); }
From source file:com.google.cloud.pubsub.PublisherSamples.java
License:Open Source License
@SuppressWarnings("unchecked") public void simplePublisher() throws Exception { Publisher publisher = Publisher.Builder.newBuilder(topic).setMaxBatchDuration(new Duration(500)) .setMaxBatchBytes(1 * 1000 * 1000).setRequestTimeout(Duration.standardSeconds(60)).build(); startTimeMs = System.currentTimeMillis(); System.out.println("Publishing messages at " + startTimeMs + " ms."); List<ListenableFuture<String>> results = new ArrayList<>(); for (int i = 0; i < 1000; ++i) { PubsubMessage message = PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8("" + i)).build(); results.add(publisher.publish(message)); }/*from w w w.j av a2 s.co m*/ System.out.println("Batched messages in " + (System.currentTimeMillis() - startTimeMs) + " ms."); handleResults(results); publisher.shutdown(); }
From source file:com.google.cloud.solutions.rtdp.Converter.java
License:Apache License
/** Starts the DataFlow convertor. */ public void startConverter(RtdpOptions options) throws IOException { final String projectId = options.getProject(); final String topic = options.getTopic(); final String datasetId = "iotds"; final String tableName = "temp_sensor"; String id = Long.toString(System.currentTimeMillis()); options.setJobName("converter-" + id); GoogleCredential credential = GoogleCredential.getApplicationDefault(TRANSPORT, JSON_FACTORY); Bigquery bigquery = new Bigquery(new NetHttpTransport(), new JacksonFactory(), credential); Dataset dataset = new Dataset(); DatasetReference datasetRef = new DatasetReference(); datasetRef.setProjectId(projectId);/* w ww . j a v a 2 s . com*/ datasetRef.setDatasetId(datasetId); dataset.setDatasetReference(datasetRef); try { bigquery.datasets().insert(projectId, dataset).execute(); LOG.debug("Creating dataset : " + datasetId); } catch (IOException e) { LOG.debug(datasetId + " dataset already exists."); } TableReference ref = new TableReference(); ref.setProjectId(projectId); ref.setDatasetId(datasetId); ref.setTableId(tableName); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); fields.add(new TableFieldSchema().setName("deviceid").setType("STRING")); fields.add(new TableFieldSchema().setName("dt").setType("DATETIME")); fields.add(new TableFieldSchema().setName("temp").setType("FLOAT")); fields.add(new TableFieldSchema().setName("lat").setType("STRING")); fields.add(new TableFieldSchema().setName("lng").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.readStrings().fromTopic("projects/" + options.getProject() + "/topics/" + topic)) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))) .apply(ParDo.of(new RowGenerator())) .apply(BigQueryIO.writeTableRows().to(ref).withSchema(schema) .withFailedInsertRetryPolicy(InsertRetryPolicy.alwaysRetry()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); p.run(); }
From source file:com.google.cloud.training.dataanalyst.javahelp.f1_11_streaming_pipeline.java
License:Apache License
@SuppressWarnings("serial") public static void main(String[] args) { MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class); options.setStreaming(true);// www . j a va 2s .co m Pipeline p = Pipeline.create(options); String topic = options.getInput(); String output = options.getOutput(); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("timestamp").setType("TIMESTAMP")); fields.add(new TableFieldSchema().setName("num_words").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p // .apply("GetMessages", PubsubIO.Read.topic(topic)) // .apply("window", Window.into(SlidingWindows// .of(Duration.standardMinutes(2))// .every(Duration.standardSeconds(30)))) // .apply("WordsPerLine", ParDo.of(new DoFn<String, Integer>() { @Override public void processElement(ProcessContext c) throws Exception { String line = c.element(); c.output(line.split(" ").length); } }))// .apply("WordsInTimeWindow", Sum.integersGlobally().withoutDefaults()) // .apply("ToBQRow", ParDo.of(new DoFn<Integer, TableRow>() { @Override public void processElement(ProcessContext c) throws Exception { TableRow row = new TableRow(); row.set("timestamp", new Date().getTime()); row.set("num_words", c.element()); c.output(row); } })) // .apply(BigQueryIO.Write.to(output)// .withSchema(schema)// .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run(); }
From source file:com.google.cloud.training.dataanalyst.javahelp.StreamDemoConsumer.java
License:Apache License
@SuppressWarnings("serial") public static void main(String[] args) { MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class); options.setRunner(DataflowPipelineRunner.class); options.setStreaming(true);//from www .j a v a 2 s . c o m Pipeline p = Pipeline.create(options); String topic = options.getInput(); String output = options.getOutput(); p // .apply("GetMessages", PubsubIO.Read.topic(topic)) // .apply("window", Window.into(SlidingWindows// .of(Duration.standardMinutes(2))// .every(Duration.standardSeconds(30)))) // .apply("LineLength", ParDo.of(new DoFn<String, Integer>() { @Override public void processElement(ProcessContext c) throws Exception { String line = c.element(); c.output(line.length()); } }))// .apply(Sum.integersGlobally().withoutDefaults()) // .apply("ToString", ParDo.of(new DoFn<Integer, String>() { @Override public void processElement(ProcessContext c) throws Exception { c.output(c.element().toString()); } })) // .apply(PubsubIO.Write.topic(output)); p.run(); }
From source file:com.google.cloud.training.dataanalyst.sandiego.CurrentConditions.java
License:Apache License
@SuppressWarnings("serial") public static void main(String[] args) { MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class); options.setStreaming(true);/*w w w. ja v a2s .co m*/ Pipeline p = Pipeline.create(options); String topic = "projects/" + options.getProject() + "/topics/sandiego"; String currConditionsTable = options.getProject() + ":demos.current_conditions"; // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("timestamp").setType("TIMESTAMP")); fields.add(new TableFieldSchema().setName("latitude").setType("FLOAT")); fields.add(new TableFieldSchema().setName("longitude").setType("FLOAT")); fields.add(new TableFieldSchema().setName("highway").setType("STRING")); fields.add(new TableFieldSchema().setName("direction").setType("STRING")); fields.add(new TableFieldSchema().setName("lane").setType("INTEGER")); fields.add(new TableFieldSchema().setName("speed").setType("FLOAT")); fields.add(new TableFieldSchema().setName("sensorId").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); PCollection<LaneInfo> laneInfo = p // .apply("GetMessages", PubsubIO.<String>read().topic(topic).withCoder(StringUtf8Coder.of())) // .apply("TimeWindow", Window.into(SlidingWindows// .of(Duration.standardSeconds(300))// .every(Duration.standardSeconds(60)))) // .apply("ExtractData", ParDo.of(new DoFn<String, LaneInfo>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { String line = c.element(); c.output(LaneInfo.newLaneInfo(line)); } })); laneInfo.apply("ToBQRow", ParDo.of(new DoFn<LaneInfo, TableRow>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { TableRow row = new TableRow(); LaneInfo info = c.element(); row.set("timestamp", info.getTimestamp()); row.set("latitude", info.getLatitude()); row.set("longitude", info.getLongitude()); row.set("highway", info.getHighway()); row.set("direction", info.getDirection()); row.set("lane", info.getLane()); row.set("speed", info.getSpeed()); row.set("sensorId", info.getSensorKey()); c.output(row); } })) // .apply(BigQueryIO.Write.to(currConditionsTable)// .withSchema(schema)// .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run(); }
From source file:com.google.codelabs.dataflow.CountRides.java
License:Apache License
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts").withCoder(TableRowJsonCoder.of())) .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1)))) .apply("mark rides", MapElements.via(new MarkRides())).apply("count similar", Count.perKey()) .apply("format rides", MapElements.via(new TransformRides())) .apply(PubsubIO.Write/* www . ja v a 2 s. c o m*/ .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }
From source file:com.google.codelabs.dataflow.DollarRides.java
License:Apache License
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts").withCoder(TableRowJsonCoder.of())) .apply("sliding window", Window.into(// w ww. j av a2 s .co m SlidingWindows.of(Duration.standardSeconds(60)).every(Duration.standardSeconds(3)))) .apply("extract meter increment", MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString())) .withOutputType(TypeDescriptor.of(Double.class))) .apply("sum whole window", Sum.doublesGlobally().withoutDefaults()) .apply("format rides", MapElements.via((Double x) -> { TableRow r = new TableRow(); r.set("dollar_run_rate_per_minute", x); LOG.info("Outputting $ value {} at {} ", x, new Date().getTime()); return r; }).withOutputType(TypeDescriptor.of(TableRow.class))) .apply(PubsubIO.Write .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }
From source file:com.google.codelabs.dataflow.ExactDollarRides.java
License:Apache License
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts").withCoder(TableRowJsonCoder.of())) .apply("extract dollars", MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString())) .withOutputType(TypeDescriptor.of(Double.class))) .apply("fixed window", Window.into(FixedWindows.of(Duration.standardMinutes(1)))) .apply("trigger", Window .<Double>triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(1))) .withLateFirings(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(5))) .apply("sum whole window", Sum.doublesGlobally().withoutDefaults()) .apply("format rides", ParDo.of(new TransformRides())) .apply(PubsubIO.Write/* w ww. j av a 2 s. c om*/ .named("WriteToPubsub").topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }
From source file:com.google.codelabs.dataflow.TimestampRides.java
License:Apache License
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts").withCoder(TableRowJsonCoder.of())) .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1)))) .apply("parse timestamps", MapElements.via((TableRow e) -> Instant .from(DateTimeFormatter.ISO_DATE_TIME.parse(e.get("timestamp").toString())) .toEpochMilli()).withOutputType(TypeDescriptor.of(Long.class))) .apply("max timestamp in window", Max.longsGlobally().withoutDefaults()) .apply("transform", MapElements.via((Long t) -> { TableRow ride = new TableRow(); ride.set("timestamp", Instant.ofEpochMilli(t).toString()); return ride; }).withOutputType(TypeDescriptor.of(TableRow.class))) .apply(PubsubIO.Write//from w w w. j a v a 2 s .com .named("write to PubSub").topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }