List of usage examples for org.joda.time Duration standardSeconds
public static Duration standardSeconds(long seconds)
From source file:org.apache.beam.sdk.extensions.sql.meta.provider.seqgen.GenerateSequenceTable.java
License:Apache License
@Override public PCollection<Row> buildIOReader(PBegin begin) { return begin.apply(GenerateSequence.from(0).withRate(elementsPerSecond, Duration.standardSeconds(1))) .apply(MapElements.into(TypeDescriptor.of(Row.class)) .via(elm -> Row.withSchema(TABLE_SCHEMA).addValues(elm, Instant.now()).build())) .setRowSchema(getSchema());//w ww . j a va2 s .c o m }
From source file:org.apache.beam.sdk.io.gcp.bigquery.FakeJobService.java
License:Apache License
@Override public Job pollJob(JobReference jobRef, int maxAttempts) throws InterruptedException { BackOff backoff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(maxAttempts) .withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.standardSeconds(1)).backoff()); Sleeper sleeper = Sleeper.DEFAULT;//w w w .j av a 2s . co m try { do { Job job = getJob(jobRef); if (job != null) { JobStatus status = job.getStatus(); if (status != null && ("DONE".equals(status.getState()) || "FAILED".equals(status.getState()))) { return job; } } } while (BackOffUtils.next(sleeper, backoff)); } catch (IOException e) { return null; } return null; }
From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteRename.java
License:Apache License
private void copy(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, List<TableReference> tempTables, WriteDisposition writeDisposition, CreateDisposition createDisposition, @Nullable String tableDescription) throws InterruptedException, IOException { JobConfigurationTableCopy copyConfig = new JobConfigurationTableCopy().setSourceTables(tempTables) .setDestinationTable(ref).setWriteDisposition(writeDisposition.name()) .setCreateDisposition(createDisposition.name()); String projectId = ref.getProjectId(); Job lastFailedCopyJob = null;/* www .jav a 2s. c o m*/ RetryJobId jobId = new RetryJobId(jobIdPrefix, 0); String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId()); BackOff backoff = BackOffAdapter.toGcpBackOff( FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1)) .withMaxBackoff(Duration.standardMinutes(1)).backoff()); Sleeper sleeper = Sleeper.DEFAULT; int i = 0; do { ++i; JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()) .setLocation(bqLocation); LOG.info("Starting copy job for table {} using {}, attempt {}", ref, jobRef, i); try { jobService.startCopyJob(jobRef, copyConfig); } catch (IOException e) { LOG.warn("Copy job {} failed with {}", jobRef, e); // It's possible that the job actually made it to BQ even though we got a failure here. // For example, the response from BQ may have timed out returning. getRetryJobId will // return the correct job id to use on retry, or a job id to continue polling (if it turns // out the the job has not actually failed yet). RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService); jobId = result.jobId; if (result.shouldRetry) { // Try the load again with the new job id. continue; } // Otherwise,the job has reached BigQuery and is in either the PENDING state or has // completed successfully. } Job copyJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES); Status jobStatus = BigQueryHelpers.parseStatus(copyJob); switch (jobStatus) { case SUCCEEDED: if (tableDescription != null) { datasetService.patchTableDescription(ref, tableDescription); } return; case UNKNOWN: // This might happen if BigQuery's job listing is slow. Retry with the same // job id. LOG.info("Copy job {} finished in unknown state: {}: {}", jobRef, copyJob.getStatus(), (i < maxRetryJobs - 1) ? "will retry" : "will not retry"); lastFailedCopyJob = copyJob; continue; case FAILED: lastFailedCopyJob = copyJob; jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId; continue; default: throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.", jobStatus, BigQueryHelpers.jobToPrettyString(copyJob))); } } while (nextBackOff(sleeper, backoff)); throw new RuntimeException(String.format( "Failed to create copy job with id prefix %s, " + "reached max retries: %d, last failed copy job: %s.", jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedCopyJob))); }
From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteTables.java
License:Apache License
private void load(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref, TimePartitioning timePartitioning, @Nullable TableSchema schema, List<String> gcsUris, WriteDisposition writeDisposition, CreateDisposition createDisposition, @Nullable String tableDescription) throws InterruptedException, IOException { JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema) .setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name()) .setCreateDisposition(createDisposition.name()).setSourceFormat("NEWLINE_DELIMITED_JSON") .setIgnoreUnknownValues(ignoreUnknownValues); if (timePartitioning != null) { loadConfig.setTimePartitioning(timePartitioning); }//from ww w .j a v a 2s . com String projectId = loadJobProjectId == null ? ref.getProjectId() : loadJobProjectId.get(); Job lastFailedLoadJob = null; String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId()); BackOff backoff = BackOffAdapter.toGcpBackOff( FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1)) .withMaxBackoff(Duration.standardMinutes(1)).backoff()); Sleeper sleeper = Sleeper.DEFAULT; // First attempt is always jobIdPrefix-0. RetryJobId jobId = new RetryJobId(jobIdPrefix, 0); int i = 0; do { ++i; JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId()) .setLocation(bqLocation); LOG.info("Loading {} files into {} using job {}, attempt {}", gcsUris.size(), ref, jobRef, i); try { jobService.startLoadJob(jobRef, loadConfig); } catch (IOException e) { LOG.warn("Load job {} failed with {}", jobRef, e); // It's possible that the job actually made it to BQ even though we got a failure here. // For example, the response from BQ may have timed out returning. getRetryJobId will // return the correct job id to use on retry, or a job id to continue polling (if it turns // out the the job has not actually failed yet). RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService); jobId = result.jobId; if (result.shouldRetry) { // Try the load again with the new job id. continue; } // Otherwise,the job has reached BigQuery and is in either the PENDING state or has // completed successfully. } LOG.info("Load job {} started", jobRef); // Try to wait until the job is done (succeeded or failed). Job loadJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES); Status jobStatus = BigQueryHelpers.parseStatus(loadJob); switch (jobStatus) { case SUCCEEDED: LOG.info("Load job {} succeeded. Statistics: {}", jobRef, loadJob.getStatistics()); if (tableDescription != null) { datasetService.patchTableDescription( ref.clone().setTableId(BigQueryHelpers.stripPartitionDecorator(ref.getTableId())), tableDescription); } return; case UNKNOWN: // This might happen if BigQuery's job listing is slow. Retry with the same // job id. LOG.info("Load job {} finished in unknown state: {}: {}", jobRef, loadJob.getStatus(), (i < maxRetryJobs - 1) ? "will retry" : "will not retry"); lastFailedLoadJob = loadJob; continue; case FAILED: lastFailedLoadJob = loadJob; jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId; LOG.info("Load job {} failed, {}: {}. Next job id {}", jobRef, (i < maxRetryJobs - 1) ? "will retry" : "will not retry", loadJob.getStatus(), jobId); continue; default: throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.", loadJob.getStatus(), BigQueryHelpers.jobToPrettyString(loadJob))); } } while (nextBackOff(sleeper, backoff)); throw new RuntimeException(String.format( "Failed to create load job with id prefix %s, " + "reached max retries: %d, last failed load job: %s.", jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedLoadJob))); }
From source file:org.apache.beam.sdk.io.kinesis.KinesisReader.java
License:Apache License
KinesisReader(SimplifiedKinesisClient kinesis, CheckpointGenerator initialCheckpointGenerator, KinesisSource source, WatermarkPolicyFactory watermarkPolicyFactory, Duration upToDateThreshold) { this(kinesis, initialCheckpointGenerator, source, watermarkPolicyFactory, upToDateThreshold, Duration.standardSeconds(30)); }
From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java
License:Apache License
/** * Monitor the performance and progress of a running job. Return final performance if it was * measured./*from ww w. j a va2s . c o m*/ */ @Nullable private NexmarkPerf monitor(NexmarkQuery query) { if (!options.getMonitorJobs()) { return null; } if (configuration.debug) { NexmarkUtils.console("Waiting for main pipeline to 'finish'"); } else { NexmarkUtils.console("--debug=false, so job will not self-cancel"); } PipelineResult job = mainResult; PipelineResult publisherJob = publisherResult; List<NexmarkPerf.ProgressSnapshot> snapshots = new ArrayList<>(); long startMsSinceEpoch = System.currentTimeMillis(); long endMsSinceEpoch = -1; if (options.getRunningTimeMinutes() != null) { endMsSinceEpoch = startMsSinceEpoch + Duration.standardMinutes(options.getRunningTimeMinutes()).getMillis() - Duration.standardSeconds(configuration.preloadSeconds).getMillis(); } long lastActivityMsSinceEpoch = -1; NexmarkPerf perf = null; boolean waitingForShutdown = false; boolean cancelJob = false; boolean publisherCancelled = false; List<String> errors = new ArrayList<>(); while (true) { long now = System.currentTimeMillis(); if (endMsSinceEpoch >= 0 && now > endMsSinceEpoch && !waitingForShutdown) { NexmarkUtils.console("Reached end of test, cancelling job"); try { cancelJob = true; job.cancel(); } catch (IOException e) { throw new RuntimeException("Unable to cancel main job: ", e); } if (publisherResult != null) { try { publisherJob.cancel(); } catch (IOException e) { throw new RuntimeException("Unable to cancel publisher job: ", e); } publisherCancelled = true; } waitingForShutdown = true; } PipelineResult.State state = job.getState(); NexmarkUtils.console("%s %s%s", state, queryName, waitingForShutdown ? " (waiting for shutdown)" : ""); NexmarkPerf currPerf; if (configuration.debug) { currPerf = currentPerf(startMsSinceEpoch, now, job, snapshots, query.eventMonitor, query.resultMonitor); } else { currPerf = null; } if (perf == null || perf.anyActivity(currPerf)) { lastActivityMsSinceEpoch = now; } if (options.isStreaming() && !waitingForShutdown) { Duration quietFor = new Duration(lastActivityMsSinceEpoch, now); long fatalCount = new MetricsReader(job, query.getName()).getCounterMetric("fatal"); if (fatalCount == -1) { fatalCount = 0; } if (fatalCount > 0) { NexmarkUtils.console("ERROR: job has fatal errors, cancelling."); errors.add(String.format("Pipeline reported %s fatal errors", fatalCount)); waitingForShutdown = true; cancelJob = true; } else if (configuration.debug && configuration.numEvents > 0 && currPerf.numEvents == configuration.numEvents && currPerf.numResults >= 0 && quietFor.isLongerThan(DONE_DELAY)) { NexmarkUtils.console("streaming query appears to have finished waiting for completion."); waitingForShutdown = true; } else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) { NexmarkUtils.console( "ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.", quietFor.getStandardMinutes()); errors.add(String.format("Cancelling streaming job since it appeared stuck for %d min.", quietFor.getStandardMinutes())); waitingForShutdown = true; cancelJob = true; } else if (quietFor.isLongerThan(STUCK_WARNING_DELAY)) { NexmarkUtils.console("WARNING: streaming query appears to have been stuck for %d min.", quietFor.getStandardMinutes()); } if (cancelJob) { try { job.cancel(); } catch (IOException e) { throw new RuntimeException("Unable to cancel main job: ", e); } } } perf = currPerf; boolean running = true; switch (state) { case UNKNOWN: case UNRECOGNIZED: case STOPPED: case RUNNING: // Keep going. break; case DONE: // All done. running = false; break; case CANCELLED: running = false; if (!cancelJob) { errors.add("Job was unexpectedly cancelled"); } break; case FAILED: case UPDATED: // Abnormal termination. running = false; errors.add("Job was unexpectedly updated"); break; } if (!running) { break; } if (lastActivityMsSinceEpoch == now) { NexmarkUtils.console("new perf %s", perf); } else { NexmarkUtils.console("no activity"); } try { Thread.sleep(PERF_DELAY.getMillis()); } catch (InterruptedException e) { Thread.interrupted(); NexmarkUtils.console("Interrupted: pipeline is still running"); } } perf.errors = errors; perf.snapshots = snapshots; if (publisherResult != null) { NexmarkUtils.console("Shutting down publisher pipeline."); try { if (!publisherCancelled) { publisherJob.cancel(); } publisherJob.waitUntilFinish(Duration.standardMinutes(5)); } catch (IOException e) { throw new RuntimeException("Unable to cancel publisher job: ", e); } } return perf; }
From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java
License:Apache License
/** Run {@code configuration} and return its performance if possible. */ @Nullable//w ww.java 2 s . co m public NexmarkPerf run() throws IOException { if (options.getManageResources() && !options.getMonitorJobs()) { throw new RuntimeException("If using --manageResources then must also use --monitorJobs."); } // // Setup per-run state. // checkState(queryName == null); if (configuration.sourceType.equals(SourceType.PUBSUB)) { pubsubHelper = PubsubHelper.create(options); } try { NexmarkUtils.console("Running %s", configuration.toShortString()); if (configuration.numEvents < 0) { NexmarkUtils.console("skipping since configuration is disabled"); return null; } NexmarkQuery<? extends KnownSize> query = getNexmarkQuery(); if (query == null) { NexmarkUtils.console("skipping since configuration is not implemented"); return null; } queryName = query.getName(); // Append queryName to temp location if (!"".equals(options.getTempLocation())) { options.setTempLocation(options.getTempLocation() + "/" + queryName); } NexmarkQueryModel model = getNexmarkQueryModel(); if (options.getJustModelResultRate()) { if (model == null) { throw new RuntimeException(String.format("No model for %s", queryName)); } modelResultRates(model); return null; } final Instant now = Instant.now(); Pipeline p = Pipeline.create(options); NexmarkUtils.setupPipeline(configuration.coderStrategy, p); // Generate events. PCollection<Event> source = createSource(p, now); if (query.getTransform().needsSideInput()) { query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration)); } if (options.getLogEvents()) { source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events")); } // Source will be null if source type is PUBSUB and mode is PUBLISH_ONLY. // In that case there's nothing more to add to pipeline. if (source != null) { // Optionally sink events in Avro format. // (Query results are ignored). if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) { sinkEventsToAvro(source); } // Query 10 logs all events to Google Cloud storage files. It could generate a lot of logs, // so, set parallelism. Also set the output path where to write log files. if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) { String path = null; if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) { path = logsDir(now.getMillis()); } ((Query10) query.getTransform()).setOutputPath(path); ((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers()); } // Apply query. PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source .apply(query); if (options.getAssertCorrectness()) { if (model == null) { throw new RuntimeException(String.format("No model for %s", queryName)); } // We know all our streams have a finite number of elements. results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED); // If we have a finite number of events then assert our pipeline's // results match those of a model using the same sequence of events. PAssert.that(results).satisfies(model.assertionFor()); } // Output results. sink(results, now.getMillis()); } mainResult = p.run(); mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout)); return monitor(query); } finally { if (pubsubHelper != null) { pubsubHelper.cleanup(); pubsubHelper = null; } configuration = null; queryName = null; } }
From source file:org.apache.beam.sdk.nexmark.PubsubHelper.java
License:Apache License
private PubsubHelper(PubsubClient pubsubClient, String project) { this.pubsubClient = pubsubClient; this.project = project; createdTopics = new ArrayList<>(); createdSubscriptions = new ArrayList<>(); sleeper = Sleeper.DEFAULT;//from w w w .ja va 2 s . c o m backOff = FluentBackoff.DEFAULT.withInitialBackoff(Duration.standardSeconds(1)).withMaxRetries(3).backoff(); }
From source file:org.apache.beam.sdk.nexmark.queries.Query10.java
License:Apache License
@Override public PCollection<Done> expand(PCollection<Event> events) { final int numLogShards = maxNumWorkers * NUM_SHARDS_PER_WORKER; return events.apply(name + ".ShardEvents", ParDo.of(new DoFn<Event, KV<String, Event>>() { private final Counter lateCounter = Metrics.counter(name, "actuallyLateEvent"); private final Counter onTimeCounter = Metrics.counter(name, "onTimeCounter"); @ProcessElement/*from w w w . java2s .c om*/ public void processElement(ProcessContext c) { if (c.element().hasAnnotation("LATE")) { lateCounter.inc(); LOG.info("Observed late: %s", c.element()); } else { onTimeCounter.inc(); } int shardNum = (int) Math.abs((long) c.element().hashCode() % numLogShards); String shard = String.format("shard-%05d-of-%05d", shardNum, numLogShards); c.output(KV.of(shard, c.element())); } })).apply(name + ".WindowEvents", Window .<KV<String, Event>>into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))) .triggering(AfterEach.inOrder( Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents)) .orFinally(AfterWatermark.pastEndOfWindow()), Repeatedly.forever(AfterFirst.of(AfterPane.elementCountAtLeast(configuration.maxLogEvents), AfterProcessingTime.pastFirstElementInPane().plusDelayOf(LATE_BATCHING_PERIOD))))) .discardingFiredPanes() // Use a 1 day allowed lateness so that any forgotten hold will stall the // pipeline for that period and be very noticeable. .withAllowedLateness(Duration.standardDays(1))).apply(name + ".GroupByKey", GroupByKey.create()) .apply(name + ".CheckForLateEvents", ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<String, Iterable<Event>>>() { private final Counter earlyCounter = Metrics.counter(name, "earlyShard"); private final Counter onTimeCounter = Metrics.counter(name, "onTimeShard"); private final Counter lateCounter = Metrics.counter(name, "lateShard"); private final Counter unexpectedLatePaneCounter = Metrics.counter(name, "ERROR_unexpectedLatePane"); private final Counter unexpectedOnTimeElementCounter = Metrics.counter(name, "ERROR_unexpectedOnTimeElement"); @ProcessElement public void processElement(ProcessContext c, BoundedWindow window) { int numLate = 0; int numOnTime = 0; for (Event event : c.element().getValue()) { if (event.hasAnnotation("LATE")) { numLate++; } else { numOnTime++; } } String shard = c.element().getKey(); LOG.info(String.format( "%s with timestamp %s has %d actually late and %d on-time " + "elements in pane %s for window %s", shard, c.timestamp(), numLate, numOnTime, c.pane(), window.maxTimestamp())); if (c.pane().getTiming() == PaneInfo.Timing.LATE) { if (numLate == 0) { LOG.error("ERROR! No late events in late pane for %s", shard); unexpectedLatePaneCounter.inc(); } if (numOnTime > 0) { LOG.error("ERROR! Have %d on-time events in late pane for %s", numOnTime, shard); unexpectedOnTimeElementCounter.inc(); } lateCounter.inc(); } else if (c.pane().getTiming() == PaneInfo.Timing.EARLY) { if (numOnTime + numLate < configuration.maxLogEvents) { LOG.error("ERROR! Only have %d events in early pane for %s", numOnTime + numLate, shard); } earlyCounter.inc(); } else { onTimeCounter.inc(); } c.output(c.element()); } })) .apply(name + ".UploadEvents", ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<Void, OutputFile>>() { private final Counter savedFileCounter = Metrics.counter(name, "savedFile"); private final Counter writtenRecordsCounter = Metrics.counter(name, "writtenRecords"); @ProcessElement public void processElement(ProcessContext c, BoundedWindow window) throws IOException { String shard = c.element().getKey(); GcsOptions options = c.getPipelineOptions().as(GcsOptions.class); OutputFile outputFile = outputFileFor(window, shard, c.pane()); LOG.info(String.format( "Writing %s with record timestamp %s, window timestamp %s, pane %s", shard, c.timestamp(), window.maxTimestamp(), c.pane())); if (outputFile.filename != null) { LOG.info("Beginning write to '%s'", outputFile.filename); int n = 0; try (OutputStream output = Channels .newOutputStream(openWritableGcsFile(options, outputFile.filename))) { for (Event event : c.element().getValue()) { Event.CODER.encode(event, output, Coder.Context.OUTER); writtenRecordsCounter.inc(); if (++n % 10000 == 0) { LOG.info("So far written %d records to '%s'", n, outputFile.filename); } } } LOG.info("Written all %d records to '%s'", n, outputFile.filename); } savedFileCounter.inc(); c.output(KV.of(null, outputFile)); } })) // Clear fancy triggering from above. .apply(name + ".WindowLogFiles", Window.<KV<Void, OutputFile>>into( FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))) .triggering(AfterWatermark.pastEndOfWindow()) // We expect no late data here, but we'll assume the worst so we can detect any. .withAllowedLateness(Duration.standardDays(1)).discardingFiredPanes()) // this GroupByKey allows to have one file per window .apply(name + ".GroupByKey2", GroupByKey.create()) .apply(name + ".Index", ParDo.of(new DoFn<KV<Void, Iterable<OutputFile>>, Done>() { private final Counter unexpectedLateCounter = Metrics.counter(name, "ERROR_unexpectedLate"); private final Counter unexpectedEarlyCounter = Metrics.counter(name, "ERROR_unexpectedEarly"); private final Counter unexpectedIndexCounter = Metrics.counter(name, "ERROR_unexpectedIndex"); private final Counter finalizedCounter = Metrics.counter(name, "indexed"); @ProcessElement public void processElement(ProcessContext c, BoundedWindow window) throws IOException { if (c.pane().getTiming() == Timing.LATE) { unexpectedLateCounter.inc(); LOG.error("ERROR! Unexpected LATE pane: %s", c.pane()); } else if (c.pane().getTiming() == Timing.EARLY) { unexpectedEarlyCounter.inc(); LOG.error("ERROR! Unexpected EARLY pane: %s", c.pane()); } else if (c.pane().getTiming() == Timing.ON_TIME && c.pane().getIndex() != 0) { unexpectedIndexCounter.inc(); LOG.error("ERROR! Unexpected ON_TIME pane index: %s", c.pane()); } else { GcsOptions options = c.getPipelineOptions().as(GcsOptions.class); LOG.info("Index with record timestamp %s, window timestamp %s, pane %s", c.timestamp(), window.maxTimestamp(), c.pane()); @Nullable String filename = indexPathFor(window); if (filename != null) { LOG.info("Beginning write to '%s'", filename); int n = 0; try (OutputStream output = Channels .newOutputStream(openWritableGcsFile(options, filename))) { for (OutputFile outputFile : c.element().getValue()) { output.write(outputFile.toString().getBytes(StandardCharsets.UTF_8)); n++; } } LOG.info("Written all %d lines to '%s'", n, filename); } c.output(new Done("written for timestamp " + window.maxTimestamp())); finalizedCounter.inc(); } } })); }
From source file:org.apache.beam.sdk.nexmark.queries.Query11.java
License:Apache License
@Override public PCollection<BidsPerSession> expand(PCollection<Event> events) { PCollection<Long> bidders = events.apply(NexmarkQueryUtil.JUST_BIDS).apply(name + ".Rekey", ParDo.of(new DoFn<Bid, Long>() { @ProcessElement/* ww w. jav a2 s . c o m*/ public void processElement(ProcessContext c) { Bid bid = c.element(); c.output(bid.bidder); } })); PCollection<Long> biddersWindowed = bidders.apply( Window.<Long>into(Sessions.withGapDuration(Duration.standardSeconds(configuration.windowSizeSec))) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents))) .discardingFiredPanes() .withAllowedLateness(Duration.standardSeconds(configuration.occasionalDelaySec / 2))); return biddersWindowed.apply(Count.perElement()).apply(name + ".ToResult", ParDo.of(new DoFn<KV<Long, Long>, BidsPerSession>() { @ProcessElement public void processElement(ProcessContext c) { c.output(new BidsPerSession(c.element().getKey(), c.element().getValue())); } })); }