Example usage for org.joda.time Duration standardSeconds

Introduction

In this page you can find the example usage for org.joda.time Duration standardSeconds.

Prototype

public static Duration standardSeconds(long seconds)

Source Link

Document

Create a duration with the specified number of seconds assuming that there are the standard number of milliseconds in a second.

Usage

From source file:org.apache.beam.sdk.extensions.sql.meta.provider.seqgen.GenerateSequenceTable.java

License:Apache License

@Override
public PCollection<Row> buildIOReader(PBegin begin) {
    return begin.apply(GenerateSequence.from(0).withRate(elementsPerSecond, Duration.standardSeconds(1)))
            .apply(MapElements.into(TypeDescriptor.of(Row.class))
                    .via(elm -> Row.withSchema(TABLE_SCHEMA).addValues(elm, Instant.now()).build()))
            .setRowSchema(getSchema());//w  ww .  j a  va2 s .c o m
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.FakeJobService.java

License:Apache License

@Override
public Job pollJob(JobReference jobRef, int maxAttempts) throws InterruptedException {
    BackOff backoff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(maxAttempts)
            .withInitialBackoff(Duration.millis(10)).withMaxBackoff(Duration.standardSeconds(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;//w  w  w  .j av  a 2s  .  co m
    try {
        do {
            Job job = getJob(jobRef);
            if (job != null) {
                JobStatus status = job.getStatus();
                if (status != null
                        && ("DONE".equals(status.getState()) || "FAILED".equals(status.getState()))) {
                    return job;
                }
            }
        } while (BackOffUtils.next(sleeper, backoff));
    } catch (IOException e) {
        return null;
    }
    return null;
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteRename.java

License:Apache License

private void copy(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref,
        List<TableReference> tempTables, WriteDisposition writeDisposition, CreateDisposition createDisposition,
        @Nullable String tableDescription) throws InterruptedException, IOException {
    JobConfigurationTableCopy copyConfig = new JobConfigurationTableCopy().setSourceTables(tempTables)
            .setDestinationTable(ref).setWriteDisposition(writeDisposition.name())
            .setCreateDisposition(createDisposition.name());

    String projectId = ref.getProjectId();
    Job lastFailedCopyJob = null;/* www .jav a 2s. c  o  m*/
    RetryJobId jobId = new RetryJobId(jobIdPrefix, 0);
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(),
            ref.getDatasetId());
    BackOff backoff = BackOffAdapter.toGcpBackOff(
            FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1))
                    .withMaxBackoff(Duration.standardMinutes(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;
    int i = 0;
    do {
        ++i;
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId())
                .setLocation(bqLocation);
        LOG.info("Starting copy job for table {} using  {}, attempt {}", ref, jobRef, i);
        try {
            jobService.startCopyJob(jobRef, copyConfig);
        } catch (IOException e) {
            LOG.warn("Copy job {} failed with {}", jobRef, e);
            // It's possible that the job actually made it to BQ even though we got a failure here.
            // For example, the response from BQ may have timed out returning. getRetryJobId will
            // return the correct job id to use on retry, or a job id to continue polling (if it turns
            // out the the job has not actually failed yet).
            RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService);
            jobId = result.jobId;
            if (result.shouldRetry) {
                // Try the load again with the new job id.
                continue;
            }
            // Otherwise,the job has reached BigQuery and is in either the PENDING state or has
            // completed successfully.
        }
        Job copyJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);
        Status jobStatus = BigQueryHelpers.parseStatus(copyJob);
        switch (jobStatus) {
        case SUCCEEDED:
            if (tableDescription != null) {
                datasetService.patchTableDescription(ref, tableDescription);
            }
            return;
        case UNKNOWN:
            // This might happen if BigQuery's job listing is slow. Retry with the same
            // job id.
            LOG.info("Copy job {} finished in unknown state: {}: {}", jobRef, copyJob.getStatus(),
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry");
            lastFailedCopyJob = copyJob;
            continue;
        case FAILED:
            lastFailedCopyJob = copyJob;
            jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId;
            continue;
        default:
            throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.", jobStatus,
                    BigQueryHelpers.jobToPrettyString(copyJob)));
        }
    } while (nextBackOff(sleeper, backoff));
    throw new RuntimeException(String.format(
            "Failed to create copy job with id prefix %s, "
                    + "reached max retries: %d, last failed copy job: %s.",
            jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedCopyJob)));
}

From source file:org.apache.beam.sdk.io.gcp.bigquery.WriteTables.java

License:Apache License

private void load(JobService jobService, DatasetService datasetService, String jobIdPrefix, TableReference ref,
        TimePartitioning timePartitioning, @Nullable TableSchema schema, List<String> gcsUris,
        WriteDisposition writeDisposition, CreateDisposition createDisposition,
        @Nullable String tableDescription) throws InterruptedException, IOException {
    JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(ref).setSchema(schema)
            .setSourceUris(gcsUris).setWriteDisposition(writeDisposition.name())
            .setCreateDisposition(createDisposition.name()).setSourceFormat("NEWLINE_DELIMITED_JSON")
            .setIgnoreUnknownValues(ignoreUnknownValues);
    if (timePartitioning != null) {
        loadConfig.setTimePartitioning(timePartitioning);
    }//from   ww  w .j a  v  a  2s .  com
    String projectId = loadJobProjectId == null ? ref.getProjectId() : loadJobProjectId.get();
    Job lastFailedLoadJob = null;
    String bqLocation = BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(),
            ref.getDatasetId());

    BackOff backoff = BackOffAdapter.toGcpBackOff(
            FluentBackoff.DEFAULT.withMaxRetries(maxRetryJobs).withInitialBackoff(Duration.standardSeconds(1))
                    .withMaxBackoff(Duration.standardMinutes(1)).backoff());
    Sleeper sleeper = Sleeper.DEFAULT;
    // First attempt is always jobIdPrefix-0.
    RetryJobId jobId = new RetryJobId(jobIdPrefix, 0);
    int i = 0;
    do {
        ++i;
        JobReference jobRef = new JobReference().setProjectId(projectId).setJobId(jobId.getJobId())
                .setLocation(bqLocation);

        LOG.info("Loading {} files into {} using job {}, attempt {}", gcsUris.size(), ref, jobRef, i);
        try {
            jobService.startLoadJob(jobRef, loadConfig);
        } catch (IOException e) {
            LOG.warn("Load job {} failed with {}", jobRef, e);
            // It's possible that the job actually made it to BQ even though we got a failure here.
            // For example, the response from BQ may have timed out returning. getRetryJobId will
            // return the correct job id to use on retry, or a job id to continue polling (if it turns
            // out the the job has not actually failed yet).
            RetryJobIdResult result = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService);
            jobId = result.jobId;
            if (result.shouldRetry) {
                // Try the load again with the new job id.
                continue;
            }
            // Otherwise,the job has reached BigQuery and is in either the PENDING state or has
            // completed successfully.
        }
        LOG.info("Load job {} started", jobRef);
        // Try to wait until the job is done (succeeded or failed).
        Job loadJob = jobService.pollJob(jobRef, BatchLoads.LOAD_JOB_POLL_MAX_RETRIES);

        Status jobStatus = BigQueryHelpers.parseStatus(loadJob);
        switch (jobStatus) {
        case SUCCEEDED:
            LOG.info("Load job {} succeeded. Statistics: {}", jobRef, loadJob.getStatistics());
            if (tableDescription != null) {
                datasetService.patchTableDescription(
                        ref.clone().setTableId(BigQueryHelpers.stripPartitionDecorator(ref.getTableId())),
                        tableDescription);
            }
            return;
        case UNKNOWN:
            // This might happen if BigQuery's job listing is slow. Retry with the same
            // job id.
            LOG.info("Load job {} finished in unknown state: {}: {}", jobRef, loadJob.getStatus(),
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry");
            lastFailedLoadJob = loadJob;
            continue;
        case FAILED:
            lastFailedLoadJob = loadJob;
            jobId = BigQueryHelpers.getRetryJobId(jobId, projectId, bqLocation, jobService).jobId;
            LOG.info("Load job {} failed, {}: {}. Next job id {}", jobRef,
                    (i < maxRetryJobs - 1) ? "will retry" : "will not retry", loadJob.getStatus(), jobId);
            continue;
        default:
            throw new IllegalStateException(String.format("Unexpected status [%s] of load job: %s.",
                    loadJob.getStatus(), BigQueryHelpers.jobToPrettyString(loadJob)));
        }
    } while (nextBackOff(sleeper, backoff));
    throw new RuntimeException(String.format(
            "Failed to create load job with id prefix %s, "
                    + "reached max retries: %d, last failed load job: %s.",
            jobIdPrefix, maxRetryJobs, BigQueryHelpers.jobToPrettyString(lastFailedLoadJob)));
}

From source file:org.apache.beam.sdk.io.kinesis.KinesisReader.java

License:Apache License

KinesisReader(SimplifiedKinesisClient kinesis, CheckpointGenerator initialCheckpointGenerator,
        KinesisSource source, WatermarkPolicyFactory watermarkPolicyFactory, Duration upToDateThreshold) {
    this(kinesis, initialCheckpointGenerator, source, watermarkPolicyFactory, upToDateThreshold,
            Duration.standardSeconds(30));
}

From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java

License:Apache License

/**
 * Monitor the performance and progress of a running job. Return final performance if it was
 * measured./*from  ww w. j  a  va2s . c  o  m*/
 */
@Nullable
private NexmarkPerf monitor(NexmarkQuery query) {
    if (!options.getMonitorJobs()) {
        return null;
    }

    if (configuration.debug) {
        NexmarkUtils.console("Waiting for main pipeline to 'finish'");
    } else {
        NexmarkUtils.console("--debug=false, so job will not self-cancel");
    }

    PipelineResult job = mainResult;
    PipelineResult publisherJob = publisherResult;
    List<NexmarkPerf.ProgressSnapshot> snapshots = new ArrayList<>();
    long startMsSinceEpoch = System.currentTimeMillis();
    long endMsSinceEpoch = -1;
    if (options.getRunningTimeMinutes() != null) {
        endMsSinceEpoch = startMsSinceEpoch
                + Duration.standardMinutes(options.getRunningTimeMinutes()).getMillis()
                - Duration.standardSeconds(configuration.preloadSeconds).getMillis();
    }
    long lastActivityMsSinceEpoch = -1;
    NexmarkPerf perf = null;
    boolean waitingForShutdown = false;
    boolean cancelJob = false;
    boolean publisherCancelled = false;
    List<String> errors = new ArrayList<>();

    while (true) {
        long now = System.currentTimeMillis();
        if (endMsSinceEpoch >= 0 && now > endMsSinceEpoch && !waitingForShutdown) {
            NexmarkUtils.console("Reached end of test, cancelling job");
            try {
                cancelJob = true;
                job.cancel();
            } catch (IOException e) {
                throw new RuntimeException("Unable to cancel main job: ", e);
            }
            if (publisherResult != null) {
                try {
                    publisherJob.cancel();
                } catch (IOException e) {
                    throw new RuntimeException("Unable to cancel publisher job: ", e);
                }
                publisherCancelled = true;
            }
            waitingForShutdown = true;
        }

        PipelineResult.State state = job.getState();
        NexmarkUtils.console("%s %s%s", state, queryName, waitingForShutdown ? " (waiting for shutdown)" : "");

        NexmarkPerf currPerf;
        if (configuration.debug) {
            currPerf = currentPerf(startMsSinceEpoch, now, job, snapshots, query.eventMonitor,
                    query.resultMonitor);
        } else {
            currPerf = null;
        }

        if (perf == null || perf.anyActivity(currPerf)) {
            lastActivityMsSinceEpoch = now;
        }

        if (options.isStreaming() && !waitingForShutdown) {
            Duration quietFor = new Duration(lastActivityMsSinceEpoch, now);
            long fatalCount = new MetricsReader(job, query.getName()).getCounterMetric("fatal");

            if (fatalCount == -1) {
                fatalCount = 0;
            }

            if (fatalCount > 0) {
                NexmarkUtils.console("ERROR: job has fatal errors, cancelling.");
                errors.add(String.format("Pipeline reported %s fatal errors", fatalCount));
                waitingForShutdown = true;
                cancelJob = true;
            } else if (configuration.debug && configuration.numEvents > 0
                    && currPerf.numEvents == configuration.numEvents && currPerf.numResults >= 0
                    && quietFor.isLongerThan(DONE_DELAY)) {
                NexmarkUtils.console("streaming query appears to have finished waiting for completion.");
                waitingForShutdown = true;
            } else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) {
                NexmarkUtils.console(
                        "ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.",
                        quietFor.getStandardMinutes());
                errors.add(String.format("Cancelling streaming job since it appeared stuck for %d min.",
                        quietFor.getStandardMinutes()));
                waitingForShutdown = true;
                cancelJob = true;
            } else if (quietFor.isLongerThan(STUCK_WARNING_DELAY)) {
                NexmarkUtils.console("WARNING: streaming query appears to have been stuck for %d min.",
                        quietFor.getStandardMinutes());
            }

            if (cancelJob) {
                try {
                    job.cancel();
                } catch (IOException e) {
                    throw new RuntimeException("Unable to cancel main job: ", e);
                }
            }
        }

        perf = currPerf;

        boolean running = true;
        switch (state) {
        case UNKNOWN:
        case UNRECOGNIZED:
        case STOPPED:
        case RUNNING:
            // Keep going.
            break;
        case DONE:
            // All done.
            running = false;
            break;
        case CANCELLED:
            running = false;
            if (!cancelJob) {
                errors.add("Job was unexpectedly cancelled");
            }
            break;
        case FAILED:
        case UPDATED:
            // Abnormal termination.
            running = false;
            errors.add("Job was unexpectedly updated");
            break;
        }

        if (!running) {
            break;
        }

        if (lastActivityMsSinceEpoch == now) {
            NexmarkUtils.console("new perf %s", perf);
        } else {
            NexmarkUtils.console("no activity");
        }

        try {
            Thread.sleep(PERF_DELAY.getMillis());
        } catch (InterruptedException e) {
            Thread.interrupted();
            NexmarkUtils.console("Interrupted: pipeline is still running");
        }
    }

    perf.errors = errors;
    perf.snapshots = snapshots;

    if (publisherResult != null) {
        NexmarkUtils.console("Shutting down publisher pipeline.");
        try {
            if (!publisherCancelled) {
                publisherJob.cancel();
            }
            publisherJob.waitUntilFinish(Duration.standardMinutes(5));
        } catch (IOException e) {
            throw new RuntimeException("Unable to cancel publisher job: ", e);
        }
    }

    return perf;
}

From source file:org.apache.beam.sdk.nexmark.NexmarkLauncher.java

License:Apache License

/** Run {@code configuration} and return its performance if possible. */
@Nullable//w  ww.java  2  s .  co  m
public NexmarkPerf run() throws IOException {
    if (options.getManageResources() && !options.getMonitorJobs()) {
        throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
    }

    //
    // Setup per-run state.
    //
    checkState(queryName == null);
    if (configuration.sourceType.equals(SourceType.PUBSUB)) {
        pubsubHelper = PubsubHelper.create(options);
    }

    try {
        NexmarkUtils.console("Running %s", configuration.toShortString());

        if (configuration.numEvents < 0) {
            NexmarkUtils.console("skipping since configuration is disabled");
            return null;
        }

        NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
        if (query == null) {
            NexmarkUtils.console("skipping since configuration is not implemented");
            return null;
        }

        queryName = query.getName();

        // Append queryName to temp location
        if (!"".equals(options.getTempLocation())) {
            options.setTempLocation(options.getTempLocation() + "/" + queryName);
        }

        NexmarkQueryModel model = getNexmarkQueryModel();

        if (options.getJustModelResultRate()) {
            if (model == null) {
                throw new RuntimeException(String.format("No model for %s", queryName));
            }
            modelResultRates(model);
            return null;
        }

        final Instant now = Instant.now();
        Pipeline p = Pipeline.create(options);
        NexmarkUtils.setupPipeline(configuration.coderStrategy, p);

        // Generate events.
        PCollection<Event> source = createSource(p, now);

        if (query.getTransform().needsSideInput()) {
            query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
        }

        if (options.getLogEvents()) {
            source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
        }

        // Source will be null if source type is PUBSUB and mode is PUBLISH_ONLY.
        // In that case there's nothing more to add to pipeline.
        if (source != null) {
            // Optionally sink events in Avro format.
            // (Query results are ignored).
            if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
                sinkEventsToAvro(source);
            }

            // Query 10 logs all events to Google Cloud storage files. It could generate a lot of logs,
            // so, set parallelism. Also set the output path where to write log files.
            if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
                String path = null;
                if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
                    path = logsDir(now.getMillis());
                }
                ((Query10) query.getTransform()).setOutputPath(path);
                ((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
            }

            // Apply query.
            PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source
                    .apply(query);

            if (options.getAssertCorrectness()) {
                if (model == null) {
                    throw new RuntimeException(String.format("No model for %s", queryName));
                }
                // We know all our streams have a finite number of elements.
                results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
                // If we have a finite number of events then assert our pipeline's
                // results match those of a model using the same sequence of events.
                PAssert.that(results).satisfies(model.assertionFor());
            }

            // Output results.
            sink(results, now.getMillis());
        }

        mainResult = p.run();
        mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
        return monitor(query);
    } finally {
        if (pubsubHelper != null) {
            pubsubHelper.cleanup();
            pubsubHelper = null;
        }
        configuration = null;
        queryName = null;
    }
}

From source file:org.apache.beam.sdk.nexmark.PubsubHelper.java

License:Apache License

private PubsubHelper(PubsubClient pubsubClient, String project) {
    this.pubsubClient = pubsubClient;
    this.project = project;
    createdTopics = new ArrayList<>();
    createdSubscriptions = new ArrayList<>();
    sleeper = Sleeper.DEFAULT;//from w  w w  .ja va 2  s  .  c  o  m
    backOff = FluentBackoff.DEFAULT.withInitialBackoff(Duration.standardSeconds(1)).withMaxRetries(3).backoff();
}

From source file:org.apache.beam.sdk.nexmark.queries.Query10.java

License:Apache License

@Override
public PCollection<Done> expand(PCollection<Event> events) {
    final int numLogShards = maxNumWorkers * NUM_SHARDS_PER_WORKER;

    return events.apply(name + ".ShardEvents", ParDo.of(new DoFn<Event, KV<String, Event>>() {
        private final Counter lateCounter = Metrics.counter(name, "actuallyLateEvent");
        private final Counter onTimeCounter = Metrics.counter(name, "onTimeCounter");

        @ProcessElement/*from   w w  w  . java2s .c  om*/
        public void processElement(ProcessContext c) {
            if (c.element().hasAnnotation("LATE")) {
                lateCounter.inc();
                LOG.info("Observed late: %s", c.element());
            } else {
                onTimeCounter.inc();
            }
            int shardNum = (int) Math.abs((long) c.element().hashCode() % numLogShards);
            String shard = String.format("shard-%05d-of-%05d", shardNum, numLogShards);
            c.output(KV.of(shard, c.element()));
        }
    })).apply(name + ".WindowEvents", Window
            .<KV<String, Event>>into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec)))
            .triggering(AfterEach.inOrder(
                    Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents))
                            .orFinally(AfterWatermark.pastEndOfWindow()),
                    Repeatedly.forever(AfterFirst.of(AfterPane.elementCountAtLeast(configuration.maxLogEvents),
                            AfterProcessingTime.pastFirstElementInPane().plusDelayOf(LATE_BATCHING_PERIOD)))))
            .discardingFiredPanes()
            // Use a 1 day allowed lateness so that any forgotten hold will stall the
            // pipeline for that period and be very noticeable.
            .withAllowedLateness(Duration.standardDays(1))).apply(name + ".GroupByKey", GroupByKey.create())
            .apply(name + ".CheckForLateEvents",
                    ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<String, Iterable<Event>>>() {
                        private final Counter earlyCounter = Metrics.counter(name, "earlyShard");
                        private final Counter onTimeCounter = Metrics.counter(name, "onTimeShard");
                        private final Counter lateCounter = Metrics.counter(name, "lateShard");
                        private final Counter unexpectedLatePaneCounter = Metrics.counter(name,
                                "ERROR_unexpectedLatePane");
                        private final Counter unexpectedOnTimeElementCounter = Metrics.counter(name,
                                "ERROR_unexpectedOnTimeElement");

                        @ProcessElement
                        public void processElement(ProcessContext c, BoundedWindow window) {
                            int numLate = 0;
                            int numOnTime = 0;
                            for (Event event : c.element().getValue()) {
                                if (event.hasAnnotation("LATE")) {
                                    numLate++;
                                } else {
                                    numOnTime++;
                                }
                            }
                            String shard = c.element().getKey();
                            LOG.info(String.format(
                                    "%s with timestamp %s has %d actually late and %d on-time "
                                            + "elements in pane %s for window %s",
                                    shard, c.timestamp(), numLate, numOnTime, c.pane(), window.maxTimestamp()));
                            if (c.pane().getTiming() == PaneInfo.Timing.LATE) {
                                if (numLate == 0) {
                                    LOG.error("ERROR! No late events in late pane for %s", shard);
                                    unexpectedLatePaneCounter.inc();
                                }
                                if (numOnTime > 0) {
                                    LOG.error("ERROR! Have %d on-time events in late pane for %s", numOnTime,
                                            shard);
                                    unexpectedOnTimeElementCounter.inc();
                                }
                                lateCounter.inc();
                            } else if (c.pane().getTiming() == PaneInfo.Timing.EARLY) {
                                if (numOnTime + numLate < configuration.maxLogEvents) {
                                    LOG.error("ERROR! Only have %d events in early pane for %s",
                                            numOnTime + numLate, shard);
                                }
                                earlyCounter.inc();
                            } else {
                                onTimeCounter.inc();
                            }
                            c.output(c.element());
                        }
                    }))
            .apply(name + ".UploadEvents",
                    ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<Void, OutputFile>>() {
                        private final Counter savedFileCounter = Metrics.counter(name, "savedFile");
                        private final Counter writtenRecordsCounter = Metrics.counter(name, "writtenRecords");

                        @ProcessElement
                        public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
                            String shard = c.element().getKey();
                            GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
                            OutputFile outputFile = outputFileFor(window, shard, c.pane());
                            LOG.info(String.format(
                                    "Writing %s with record timestamp %s, window timestamp %s, pane %s", shard,
                                    c.timestamp(), window.maxTimestamp(), c.pane()));
                            if (outputFile.filename != null) {
                                LOG.info("Beginning write to '%s'", outputFile.filename);
                                int n = 0;
                                try (OutputStream output = Channels
                                        .newOutputStream(openWritableGcsFile(options, outputFile.filename))) {
                                    for (Event event : c.element().getValue()) {
                                        Event.CODER.encode(event, output, Coder.Context.OUTER);
                                        writtenRecordsCounter.inc();
                                        if (++n % 10000 == 0) {
                                            LOG.info("So far written %d records to '%s'", n,
                                                    outputFile.filename);
                                        }
                                    }
                                }
                                LOG.info("Written all %d records to '%s'", n, outputFile.filename);
                            }
                            savedFileCounter.inc();
                            c.output(KV.of(null, outputFile));
                        }
                    }))
            // Clear fancy triggering from above.
            .apply(name + ".WindowLogFiles",
                    Window.<KV<Void, OutputFile>>into(
                            FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec)))
                            .triggering(AfterWatermark.pastEndOfWindow())
                            // We expect no late data here, but we'll assume the worst so we can detect any.
                            .withAllowedLateness(Duration.standardDays(1)).discardingFiredPanes())
            // this GroupByKey allows to have one file per window
            .apply(name + ".GroupByKey2", GroupByKey.create())
            .apply(name + ".Index", ParDo.of(new DoFn<KV<Void, Iterable<OutputFile>>, Done>() {
                private final Counter unexpectedLateCounter = Metrics.counter(name, "ERROR_unexpectedLate");
                private final Counter unexpectedEarlyCounter = Metrics.counter(name, "ERROR_unexpectedEarly");
                private final Counter unexpectedIndexCounter = Metrics.counter(name, "ERROR_unexpectedIndex");
                private final Counter finalizedCounter = Metrics.counter(name, "indexed");

                @ProcessElement
                public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
                    if (c.pane().getTiming() == Timing.LATE) {
                        unexpectedLateCounter.inc();
                        LOG.error("ERROR! Unexpected LATE pane: %s", c.pane());
                    } else if (c.pane().getTiming() == Timing.EARLY) {
                        unexpectedEarlyCounter.inc();
                        LOG.error("ERROR! Unexpected EARLY pane: %s", c.pane());
                    } else if (c.pane().getTiming() == Timing.ON_TIME && c.pane().getIndex() != 0) {
                        unexpectedIndexCounter.inc();
                        LOG.error("ERROR! Unexpected ON_TIME pane index: %s", c.pane());
                    } else {
                        GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
                        LOG.info("Index with record timestamp %s, window timestamp %s, pane %s", c.timestamp(),
                                window.maxTimestamp(), c.pane());

                        @Nullable
                        String filename = indexPathFor(window);
                        if (filename != null) {
                            LOG.info("Beginning write to '%s'", filename);
                            int n = 0;
                            try (OutputStream output = Channels
                                    .newOutputStream(openWritableGcsFile(options, filename))) {
                                for (OutputFile outputFile : c.element().getValue()) {
                                    output.write(outputFile.toString().getBytes(StandardCharsets.UTF_8));
                                    n++;
                                }
                            }
                            LOG.info("Written all %d lines to '%s'", n, filename);
                        }
                        c.output(new Done("written for timestamp " + window.maxTimestamp()));
                        finalizedCounter.inc();
                    }
                }
            }));
}

From source file:org.apache.beam.sdk.nexmark.queries.Query11.java

License:Apache License

@Override
public PCollection<BidsPerSession> expand(PCollection<Event> events) {
    PCollection<Long> bidders = events.apply(NexmarkQueryUtil.JUST_BIDS).apply(name + ".Rekey",
            ParDo.of(new DoFn<Bid, Long>() {

                @ProcessElement/*  ww  w.  jav a2 s . c  o  m*/
                public void processElement(ProcessContext c) {
                    Bid bid = c.element();
                    c.output(bid.bidder);
                }
            }));

    PCollection<Long> biddersWindowed = bidders.apply(
            Window.<Long>into(Sessions.withGapDuration(Duration.standardSeconds(configuration.windowSizeSec)))
                    .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents)))
                    .discardingFiredPanes()
                    .withAllowedLateness(Duration.standardSeconds(configuration.occasionalDelaySec / 2)));
    return biddersWindowed.apply(Count.perElement()).apply(name + ".ToResult",
            ParDo.of(new DoFn<KV<Long, Long>, BidsPerSession>() {

                @ProcessElement
                public void processElement(ProcessContext c) {
                    c.output(new BidsPerSession(c.element().getKey(), c.element().getValue()));
                }
            }));
}