Example usage for org.joda.time Duration standardSeconds

List of usage examples for org.joda.time Duration standardSeconds

Introduction

In this page you can find the example usage for org.joda.time Duration standardSeconds.

Prototype

public static Duration standardSeconds(long seconds) 

Source Link

Document

Create a duration with the specified number of seconds assuming that there are the standard number of milliseconds in a second.

Usage

From source file:org.apache.beam.sdk.extensions.sql.meta.provider.seqgen.GenerateSequenceTable.java

License:Apache License

@Override
public PCollection<Row> buildIOReader(PBegin begin) {
    return begin.apply(GenerateSequence.from(0).withRate(elementsPerSecond, Duration.standardSeconds(1)))
            .apply(MapElements.into(TypeDescriptor.of(Row.class))
                    .via(elm -> Row.withSchema(TABLE_SCHEMA).addValues(elm, Instant.now()).build()))
            .setRowSchema(getSchema());//w  ww .  j a  va2 s .c o m
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.FakeJobService.java

License:Apache License

@Override
public Job pollJob(JobReference jobRef, int maxAttempts) throws InterruptedException {
    BackOff backoff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(maxAttempts)
            .withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.standardSeconds(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;//w  w  w  .j av  a 2s  .  co m
    try {
        do {
            Job job = getJob(jobRef);
            if (job != null) {
                JobStatus status = job.getStatus();
                if (status != null
                        && ("DONE".equals(status.getState()) || "FAILED".equals(status.getState()))) {
                    return job;
                }
            }
        } while (BackOffUtils.next(sleeper, backoff));
    } catch (IOException e) {
        return null;
    }
    return null;
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteRename.java

License:Apache License

private void copy(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref,
        List<TableReference> tempTables, WriteDisposition writeDisposition, CreateDisposition createDisposition,
        @Nullable String tableDescription) throws InterruptedException, IOException {
    JobConfigurationTableCopy copyConfig = new JobConfigurationTableCopy().setSourceTables(tempTables)
            .setDestinationTable(ref).setWriteDisposition(writeDisposition.name())
            .setCreateDisposition(createDisposition.name());

    String projectId = ref.getProjectId();
    Job lastFailedCopyJob = null;/* www .jav a 2s. c  o  m*/
    RetryJobId jobId = new RetryJobId(jobIdPrefix, 0);
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(),
            ref.getDatasetId());
    BackOff backoff = BackOffAdapter.toGcpBackOff(
            FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1))
                    .withMaxBackoff(Duration.standardMinutes(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;
    int i = 0;
    do {
        ++i;
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId())
                .setLocation(bqLocation);
        LOG.info("Starting copy job for table {} using  {}, attempt {}", ref, jobRef, i);
        try {
            jobService.startCopyJob(jobRef, copyConfig);
        } catch (IOException e) {
            LOG.warn("Copy job {} failed with {}", jobRef, e);
            // It's possible that the job actually made it to BQ even though we got a failure here.
            // For example, the response from BQ may have timed out returning. getRetryJobId will
            // return the correct job id to use on retry, or a job id to continue polling (if it turns
            // out the the job has not actually failed yet).
            RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService);
            jobId = result.jobId;
            if (result.shouldRetry) {
                // Try the load again with the new job id.
                continue;
            }
            // Otherwise,the job has reached BigQuery and is in either the PENDING state or has
            // completed successfully.
        }
        Job copyJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
        Status jobStatus = BigQueryHelpers.parseStatus(copyJob);
        switch (jobStatus) {
        case SUCCEEDED:
            if (tableDescription != null) {
                datasetService.patchTableDescription(ref, tableDescription);
            }
            return;
        case UNKNOWN:
            // This might happen if BigQuery's job listing is slow. Retry with the same
            // job id.
            LOG.info("Copy job {} finished in unknown state: {}: {}", jobRef, copyJob.getStatus(),
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry");
            lastFailedCopyJob = copyJob;
            continue;
        case FAILED:
            lastFailedCopyJob = copyJob;
            jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId;
            continue;
        default:
            throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.", jobStatus,
                    BigQueryHelpers.jobToPrettyString(copyJob)));
        }
    } while (nextBackOff(sleeper, backoff));
    throw new RuntimeException(String.format(
            "Failed to create copy job with id prefix %s, "
                    + "reached max retries: %d, last failed copy job: %s.",
            jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedCopyJob)));
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteTables.java

License:Apache License

private void load(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref,
        TimePartitioning timePartitioning, @Nullable TableSchema schema, List<String> gcsUris,
        WriteDisposition writeDisposition, CreateDisposition createDisposition,
        @Nullable String tableDescription) throws InterruptedException, IOException {
    JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema)
            .setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name())
            .setCreateDisposition(createDisposition.name()).setSourceFormat("NEWLINE_DELIMITED_JSON")
            .setIgnoreUnknownValues(ignoreUnknownValues);
    if (timePartitioning != null) {
        loadConfig.setTimePartitioning(timePartitioning);
    }//from   ww  w .j a  v  a  2s .  com
    String projectId = loadJobProjectId == null ? ref.getProjectId() : loadJobProjectId.get();
    Job lastFailedLoadJob = null;
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(),
            ref.getDatasetId());

    BackOff backoff = BackOffAdapter.toGcpBackOff(
            FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1))
                    .withMaxBackoff(Duration.standardMinutes(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;
    // First attempt is always jobIdPrefix-0.
    RetryJobId jobId = new RetryJobId(jobIdPrefix, 0);
    int i = 0;
    do {
        ++i;
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId())
                .setLocation(bqLocation);

        LOG.info("Loading {} files into {} using job {}, attempt {}", gcsUris.size(), ref, jobRef, i);
        try {
            jobService.startLoadJob(jobRef, loadConfig);
        } catch (IOException e) {
            LOG.warn("Load job {} failed with {}", jobRef, e);
            // It's possible that the job actually made it to BQ even though we got a failure here.
            // For example, the response from BQ may have timed out returning. getRetryJobId will
            // return the correct job id to use on retry, or a job id to continue polling (if it turns
            // out the the job has not actually failed yet).
            RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService);
            jobId = result.jobId;
            if (result.shouldRetry) {
                // Try the load again with the new job id.
                continue;
            }
            // Otherwise,the job has reached BigQuery and is in either the PENDING state or has
            // completed successfully.
        }
        LOG.info("Load job {} started", jobRef);
        // Try to wait until the job is done (succeeded or failed).
        Job loadJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);

        Status jobStatus = BigQueryHelpers.parseStatus(loadJob);
        switch (jobStatus) {
        case SUCCEEDED:
            LOG.info("Load job {} succeeded. Statistics: {}", jobRef, loadJob.getStatistics());
            if (tableDescription != null) {
                datasetService.patchTableDescription(
                        ref.clone().setTableId(BigQueryHelpers.stripPartitionDecorator(ref.getTableId())),
                        tableDescription);
            }
            return;
        case UNKNOWN:
            // This might happen if BigQuery's job listing is slow. Retry with the same
            // job id.
            LOG.info("Load job {} finished in unknown state: {}: {}", jobRef, loadJob.getStatus(),
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry");
            lastFailedLoadJob = loadJob;
            continue;
        case FAILED:
            lastFailedLoadJob = loadJob;
            jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId;
            LOG.info("Load job {} failed, {}: {}. Next job id {}", jobRef,
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry", loadJob.getStatus(), jobId);
            continue;
        default:
            throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.",
                    loadJob.getStatus(), BigQueryHelpers.jobToPrettyString(loadJob)));
        }
    } while (nextBackOff(sleeper, backoff));
    throw new RuntimeException(String.format(
            "Failed to create load job with id prefix %s, "
                    + "reached max retries: %d, last failed load job: %s.",
            jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedLoadJob)));
}

From source file:org.apache.beam.sdk.io.kinesis.KinesisReader.java

License:Apache License

KinesisReader(SimplifiedKinesisClient kinesis, CheckpointGenerator initialCheckpointGenerator,
        KinesisSource source, WatermarkPolicyFactory watermarkPolicyFactory, Duration upToDateThreshold) {
    this(kinesis, initialCheckpointGenerator, source, watermarkPolicyFactory, upToDateThreshold,
            Duration.standardSeconds(30));
}

From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java

License:Apache License

/**
 * Monitor the performance and progress of a running job. Return final performance if it was
 * measured./*from  ww w. j  a  va2s . c  o  m*/
 */
@Nullable
private NexmarkPerf monitor(NexmarkQuery query) {
    if (!options.getMonitorJobs()) {
        return null;
    }

    if (configuration.debug) {
        NexmarkUtils.console("Waiting for main pipeline to 'finish'");
    } else {
        NexmarkUtils.console("--debug=false, so job will not self-cancel");
    }

    PipelineResult job = mainResult;
    PipelineResult publisherJob = publisherResult;
    List<NexmarkPerf.ProgressSnapshot> snapshots = new ArrayList<>();
    long startMsSinceEpoch = System.currentTimeMillis();
    long endMsSinceEpoch = -1;
    if (options.getRunningTimeMinutes() != null) {
        endMsSinceEpoch = startMsSinceEpoch
                + Duration.standardMinutes(options.getRunningTimeMinutes()).getMillis()
                - Duration.standardSeconds(configuration.preloadSeconds).getMillis();
    }
    long lastActivityMsSinceEpoch = -1;
    NexmarkPerf perf = null;
    boolean waitingForShutdown = false;
    boolean cancelJob = false;
    boolean publisherCancelled = false;
    List<String> errors = new ArrayList<>();

    while (true) {
        long now = System.currentTimeMillis();
        if (endMsSinceEpoch >= 0 && now > endMsSinceEpoch && !waitingForShutdown) {
            NexmarkUtils.console("Reached end of test, cancelling job");
            try {
                cancelJob = true;
                job.cancel();
            } catch (IOException e) {
                throw new RuntimeException("Unable to cancel main job: ", e);
            }
            if (publisherResult != null) {
                try {
                    publisherJob.cancel();
                } catch (IOException e) {
                    throw new RuntimeException("Unable to cancel publisher job: ", e);
                }
                publisherCancelled = true;
            }
            waitingForShutdown = true;
        }

        PipelineResult.State state = job.getState();
        NexmarkUtils.console("%s %s%s", state, queryName, waitingForShutdown ? " (waiting for shutdown)" : "");

        NexmarkPerf currPerf;
        if (configuration.debug) {
            currPerf = currentPerf(startMsSinceEpoch, now, job, snapshots, query.eventMonitor,
                    query.resultMonitor);
        } else {
            currPerf = null;
        }

        if (perf == null || perf.anyActivity(currPerf)) {
            lastActivityMsSinceEpoch = now;
        }

        if (options.isStreaming() && !waitingForShutdown) {
            Duration quietFor = new Duration(lastActivityMsSinceEpoch, now);
            long fatalCount = new MetricsReader(job, query.getName()).getCounterMetric("fatal");

            if (fatalCount == -1) {
                fatalCount = 0;
            }

            if (fatalCount > 0) {
                NexmarkUtils.console("ERROR: job has fatal errors, cancelling.");
                errors.add(String.format("Pipeline reported %s fatal errors", fatalCount));
                waitingForShutdown = true;
                cancelJob = true;
            } else if (configuration.debug && configuration.numEvents > 0
                    && currPerf.numEvents == configuration.numEvents && currPerf.numResults >= 0
                    && quietFor.isLongerThan(DONE_DELAY)) {
                NexmarkUtils.console("streaming query appears to have finished waiting for completion.");
                waitingForShutdown = true;
            } else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) {
                NexmarkUtils.console(
                        "ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.",
                        quietFor.getStandardMinutes());
                errors.add(String.format("Cancelling streaming job since it appeared stuck for %d min.",
                        quietFor.getStandardMinutes()));
                waitingForShutdown = true;
                cancelJob = true;
            } else if (quietFor.isLongerThan(STUCK_WARNING_DELAY)) {
                NexmarkUtils.console("WARNING: streaming query appears to have been stuck for %d min.",
                        quietFor.getStandardMinutes());
            }

            if (cancelJob) {
                try {
                    job.cancel();
                } catch (IOException e) {
                    throw new RuntimeException("Unable to cancel main job: ", e);
                }
            }
        }

        perf = currPerf;

        boolean running = true;
        switch (state) {
        case UNKNOWN:
        case UNRECOGNIZED:
        case STOPPED:
        case RUNNING:
            // Keep going.
            break;
        case DONE:
            // All done.
            running = false;
            break;
        case CANCELLED:
            running = false;
            if (!cancelJob) {
                errors.add("Job was unexpectedly cancelled");
            }
            break;
        case FAILED:
        case UPDATED:
            // Abnormal termination.
            running = false;
            errors.add("Job was unexpectedly updated");
            break;
        }

        if (!running) {
            break;
        }

        if (lastActivityMsSinceEpoch == now) {
            NexmarkUtils.console("new perf %s", perf);
        } else {
            NexmarkUtils.console("no activity");
        }

        try {
            Thread.sleep(PERF_DELAY.getMillis());
        } catch (InterruptedException e) {
            Thread.interrupted();
            NexmarkUtils.console("Interrupted: pipeline is still running");
        }
    }

    perf.errors = errors;
    perf.snapshots = snapshots;

    if (publisherResult != null) {
        NexmarkUtils.console("Shutting down publisher pipeline.");
        try {
            if (!publisherCancelled) {
                publisherJob.cancel();
            }
            publisherJob.waitUntilFinish(Duration.standardMinutes(5));
        } catch (IOException e) {
            throw new RuntimeException("Unable to cancel publisher job: ", e);
        }
    }

    return perf;
}

From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java

License:Apache License

/** Run {@code configuration} and return its performance if possible. */
@Nullable//w  ww.java  2  s .  co  m
public NexmarkPerf run() throws IOException {
    if (options.getManageResources() && !options.getMonitorJobs()) {
        throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
    }

    //
    // Setup per-run state.
    //
    checkState(queryName == null);
    if (configuration.sourceType.equals(SourceType.PUBSUB)) {
        pubsubHelper = PubsubHelper.create(options);
    }

    try {
        NexmarkUtils.console("Running %s", configuration.toShortString());

        if (configuration.numEvents < 0) {
            NexmarkUtils.console("skipping since configuration is disabled");
            return null;
        }

        NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
        if (query == null) {
            NexmarkUtils.console("skipping since configuration is not implemented");
            return null;
        }

        queryName = query.getName();

        // Append queryName to temp location
        if (!"".equals(options.getTempLocation())) {
            options.setTempLocation(options.getTempLocation() + "/" + queryName);
        }

        NexmarkQueryModel model = getNexmarkQueryModel();

        if (options.getJustModelResultRate()) {
            if (model == null) {
                throw new RuntimeException(String.format("No model for %s", queryName));
            }
            modelResultRates(model);
            return null;
        }

        final Instant now = Instant.now();
        Pipeline p = Pipeline.create(options);
        NexmarkUtils.setupPipeline(configuration.coderStrategy, p);

        // Generate events.
        PCollection<Event> source = createSource(p, now);

        if (query.getTransform().needsSideInput()) {
            query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
        }

        if (options.getLogEvents()) {
            source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
        }

        // Source will be null if source type is PUBSUB and mode is PUBLISH_ONLY.
        // In that case there's nothing more to add to pipeline.
        if (source != null) {
            // Optionally sink events in Avro format.
            // (Query results are ignored).
            if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
                sinkEventsToAvro(source);
            }

            // Query 10 logs all events to Google Cloud storage files. It could generate a lot of logs,
            // so, set parallelism. Also set the output path where to write log files.
            if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
                String path = null;
                if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
                    path = logsDir(now.getMillis());
                }
                ((Query10) query.getTransform()).setOutputPath(path);
                ((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
            }

            // Apply query.
            PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source
                    .apply(query);

            if (options.getAssertCorrectness()) {
                if (model == null) {
                    throw new RuntimeException(String.format("No model for %s", queryName));
                }
                // We know all our streams have a finite number of elements.
                results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
                // If we have a finite number of events then assert our pipeline's
                // results match those of a model using the same sequence of events.
                PAssert.that(results).satisfies(model.assertionFor());
            }

            // Output results.
            sink(results, now.getMillis());
        }

        mainResult = p.run();
        mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
        return monitor(query);
    } finally {
        if (pubsubHelper != null) {
            pubsubHelper.cleanup();
            pubsubHelper = null;
        }
        configuration = null;
        queryName = null;
    }
}

From source file:org.apache.beam.sdk.nexmark.PubsubHelper.java

License:Apache License

private PubsubHelper(PubsubClient pubsubClient, String project) {
    this.pubsubClient = pubsubClient;
    this.project = project;
    createdTopics = new ArrayList<>();
    createdSubscriptions = new ArrayList<>();
    sleeper = Sleeper.DEFAULT;//from w  w w  .ja va 2  s  .  c  o  m
    backOff = FluentBackoff.DEFAULT.withInitialBackoff(Duration.standardSeconds(1)).withMaxRetries(3).backoff();
}

From source file:org.apache.beam.sdk.nexmark.queries.Query10.java

License:Apache License

@Override
public PCollection<Done> expand(PCollection<Event> events) {
    final int numLogShards = maxNumWorkers * NUM_SHARDS_PER_WORKER;

    return events.apply(name + ".ShardEvents", ParDo.of(new DoFn<Event, KV<String, Event>>() {
        private final Counter lateCounter = Metrics.counter(name, "actuallyLateEvent");
        private final Counter onTimeCounter = Metrics.counter(name, "onTimeCounter");

        @ProcessElement/*from   w w  w  . java2s .c  om*/
        public void processElement(ProcessContext c) {
            if (c.element().hasAnnotation("LATE")) {
                lateCounter.inc();
                LOG.info("Observed late: %s", c.element());
            } else {
                onTimeCounter.inc();
            }
            int shardNum = (int) Math.abs((long) c.element().hashCode() % numLogShards);
            String shard = String.format("shard-%05d-of-%05d", shardNum, numLogShards);
            c.output(KV.of(shard, c.element()));
        }
    })).apply(name + ".WindowEvents", Window
            .<KV<String, Event>>into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec)))
            .triggering(AfterEach.inOrder(
                    Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents))
                            .orFinally(AfterWatermark.pastEndOfWindow()),
                    Repeatedly.forever(AfterFirst.of(AfterPane.elementCountAtLeast(configuration.maxLogEvents),
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(LATE_BATCHING_PERIOD)))))
            .discardingFiredPanes()
            // Use a 1 day allowed lateness so that any forgotten hold will stall the
            // pipeline for that period and be very noticeable.
            .withAllowedLateness(Duration.standardDays(1))).apply(name + ".GroupByKey", GroupByKey.create())
            .apply(name + ".CheckForLateEvents",
                    ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<String, Iterable<Event>>>() {
                        private final Counter earlyCounter = Metrics.counter(name, "earlyShard");
                        private final Counter onTimeCounter = Metrics.counter(name, "onTimeShard");
                        private final Counter lateCounter = Metrics.counter(name, "lateShard");
                        private final Counter unexpectedLatePaneCounter = Metrics.counter(name,
                                "ERROR_unexpectedLatePane");
                        private final Counter unexpectedOnTimeElementCounter = Metrics.counter(name,
                                "ERROR_unexpectedOnTimeElement");

                        @ProcessElement
                        public void processElement(ProcessContext c, BoundedWindow window) {
                            int numLate = 0;
                            int numOnTime = 0;
                            for (Event event : c.element().getValue()) {
                                if (event.hasAnnotation("LATE")) {
                                    numLate++;
                                } else {
                                    numOnTime++;
                                }
                            }
                            String shard = c.element().getKey();
                            LOG.info(String.format(
                                    "%s with timestamp %s has %d actually late and %d on-time "
                                            + "elements in pane %s for window %s",
                                    shard, c.timestamp(), numLate, numOnTime, c.pane(), window.maxTimestamp()));
                            if (c.pane().getTiming() == PaneInfo.Timing.LATE) {
                                if (numLate == 0) {
                                    LOG.error("ERROR! No late events in late pane for %s", shard);
                                    unexpectedLatePaneCounter.inc();
                                }
                                if (numOnTime > 0) {
                                    LOG.error("ERROR! Have %d on-time events in late pane for %s", numOnTime,
                                            shard);
                                    unexpectedOnTimeElementCounter.inc();
                                }
                                lateCounter.inc();
                            } else if (c.pane().getTiming() == PaneInfo.Timing.EARLY) {
                                if (numOnTime + numLate < configuration.maxLogEvents) {
                                    LOG.error("ERROR! Only have %d events in early pane for %s",
                                            numOnTime + numLate, shard);
                                }
                                earlyCounter.inc();
                            } else {
                                onTimeCounter.inc();
                            }
                            c.output(c.element());
                        }
                    }))
            .apply(name + ".UploadEvents",
                    ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<Void, OutputFile>>() {
                        private final Counter savedFileCounter = Metrics.counter(name, "savedFile");
                        private final Counter writtenRecordsCounter = Metrics.counter(name, "writtenRecords");

                        @ProcessElement
                        public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
                            String shard = c.element().getKey();
                            GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
                            OutputFile outputFile = outputFileFor(window, shard, c.pane());
                            LOG.info(String.format(
                                    "Writing %s with record timestamp %s, window timestamp %s, pane %s", shard,
                                    c.timestamp(), window.maxTimestamp(), c.pane()));
                            if (outputFile.filename != null) {
                                LOG.info("Beginning write to '%s'", outputFile.filename);
                                int n = 0;
                                try (OutputStream output = Channels
                                        .newOutputStream(openWritableGcsFile(options, outputFile.filename))) {
                                    for (Event event : c.element().getValue()) {
                                        Event.CODER.encode(event, output, Coder.Context.OUTER);
                                        writtenRecordsCounter.inc();
                                        if (++n % 10000 == 0) {
                                            LOG.info("So far written %d records to '%s'", n,
                                                    outputFile.filename);
                                        }
                                    }
                                }
                                LOG.info("Written all %d records to '%s'", n, outputFile.filename);
                            }
                            savedFileCounter.inc();
                            c.output(KV.of(null, outputFile));
                        }
                    }))
            // Clear fancy triggering from above.
            .apply(name + ".WindowLogFiles",
                    Window.<KV<Void, OutputFile>>into(
                            FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec)))
                            .triggering(AfterWatermark.pastEndOfWindow())
                            // We expect no late data here, but we'll assume the worst so we can detect any.
                            .withAllowedLateness(Duration.standardDays(1)).discardingFiredPanes())
            // this GroupByKey allows to have one file per window
            .apply(name + ".GroupByKey2", GroupByKey.create())
            .apply(name + ".Index", ParDo.of(new DoFn<KV<Void, Iterable<OutputFile>>, Done>() {
                private final Counter unexpectedLateCounter = Metrics.counter(name, "ERROR_unexpectedLate");
                private final Counter unexpectedEarlyCounter = Metrics.counter(name, "ERROR_unexpectedEarly");
                private final Counter unexpectedIndexCounter = Metrics.counter(name, "ERROR_unexpectedIndex");
                private final Counter finalizedCounter = Metrics.counter(name, "indexed");

                @ProcessElement
                public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
                    if (c.pane().getTiming() == Timing.LATE) {
                        unexpectedLateCounter.inc();
                        LOG.error("ERROR! Unexpected LATE pane: %s", c.pane());
                    } else if (c.pane().getTiming() == Timing.EARLY) {
                        unexpectedEarlyCounter.inc();
                        LOG.error("ERROR! Unexpected EARLY pane: %s", c.pane());
                    } else if (c.pane().getTiming() == Timing.ON_TIME && c.pane().getIndex() != 0) {
                        unexpectedIndexCounter.inc();
                        LOG.error("ERROR! Unexpected ON_TIME pane index: %s", c.pane());
                    } else {
                        GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
                        LOG.info("Index with record timestamp %s, window timestamp %s, pane %s", c.timestamp(),
                                window.maxTimestamp(), c.pane());

                        @Nullable
                        String filename = indexPathFor(window);
                        if (filename != null) {
                            LOG.info("Beginning write to '%s'", filename);
                            int n = 0;
                            try (OutputStream output = Channels
                                    .newOutputStream(openWritableGcsFile(options, filename))) {
                                for (OutputFile outputFile : c.element().getValue()) {
                                    output.write(outputFile.toString().getBytes(StandardCharsets.UTF_8));
                                    n++;
                                }
                            }
                            LOG.info("Written all %d lines to '%s'", n, filename);
                        }
                        c.output(new Done("written for timestamp " + window.maxTimestamp()));
                        finalizedCounter.inc();
                    }
                }
            }));
}

From source file:org.apache.beam.sdk.nexmark.queries.Query11.java

License:Apache License

@Override
public PCollection<BidsPerSession> expand(PCollection<Event> events) {
    PCollection<Long> bidders = events.apply(NexmarkQueryUtil.JUST_BIDS).apply(name + ".Rekey",
            ParDo.of(new DoFn<Bid, Long>() {

                @ProcessElement/*  ww  w.  jav a2 s . c  o  m*/
                public void processElement(ProcessContext c) {
                    Bid bid = c.element();
                    c.output(bid.bidder);
                }
            }));

    PCollection<Long> biddersWindowed = bidders.apply(
            Window.<Long>into(Sessions.withGapDuration(Duration.standardSeconds(configuration.windowSizeSec)))
                    .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents)))
                    .discardingFiredPanes()
                    .withAllowedLateness(Duration.standardSeconds(configuration.occasionalDelaySec / 2)));
    return biddersWindowed.apply(Count.perElement()).apply(name + ".ToResult",
            ParDo.of(new DoFn<KV<Long, Long>, BidsPerSession>() {

                @ProcessElement
                public void processElement(ProcessContext c) {
                    c.output(new BidsPerSession(c.element().getKey(), c.element().getValue()));
                }
            }));
}