Example usage for com.google.common.base Utf8 encodedLength

List of usage examples for com.google.common.base Utf8 encodedLength

Introduction

In this page you can find the example usage for com.google.common.base Utf8 encodedLength.

Prototype

@CheckReturnValue
public static int encodedLength(CharSequence sequence) 

Source Link

Document

Returns the number of bytes in the UTF-8-encoded form of sequence .

Usage

From source file:org.apache.bookkeeper.common.coder.StringUtf8Coder.java

@Override
public int getSerializedSize(String value) {
    return Utf8.encodedLength(value);
}

From source file:com.google.shipshape.analyzers.EncodingOffsetConverter.java

/**
 * Converts a UTF-16 code unit index to a byte index, based on the source provided when this
 * {@link EncodingOffsetConverter} was constructed.
 *
 * <p>Note that each call to this method may take O(n) time, where n is the length of the
 * fileContent.  Do not call in a tight loop.
 *
 * @param utf16CodeUnitIndex An index into the source string in terms of UTF-16 code units
 * @return An index into the source string in terms of bytes
 *//*from w  ww. j  ava 2 s.com*/
public int toByteIndex(int utf16CodeUnitIndex) {
    if (utf16CodeUnitIndex < 0 || utf16CodeUnitIndex >= fileContent.length()) {
        throw new IllegalArgumentException(
                "index out of bounds: " + utf16CodeUnitIndex + ", length " + fileContent.length());
    }
    if (encoding.equals(UTF_8)) {
        return Utf8.encodedLength(fileContent.subSequence(0, utf16CodeUnitIndex));
    } else if (encoding.equals(ISO_8859_1)) {
        // ISO-8859-1 is a fixed 8-bit encoding, and it was incorporated as the first 256 code
        // points for Unicode.  UTF-16 encodes code points U+0000 to U+D7FF and U+E000 to U+FFFF
        // as single 16-bit code units, so it is OK to just return the UTF-16 code unit index.
        return utf16CodeUnitIndex;
    } else {
        throw new IllegalStateException("Unsupported encoding: " + encoding);
    }
}

From source file:com.google.cloud.dataflow.sdk.coders.StringUtf8Coder.java

/**
 * {@inheritDoc}//ww  w .ja  v  a2  s  .com
 *
 * @return the byte size of the UTF-8 encoding of the a string or, in a nested context,
 * the byte size of the encoding plus the encoded length prefix.
 */
@Override
protected long getEncodedElementByteSize(String value, Context context) throws Exception {
    if (value == null) {
        throw new CoderException("cannot encode a null String");
    }
    if (context.isWholeStream) {
        return Utf8.encodedLength(value);
    } else {
        CountingOutputStream countingStream = new CountingOutputStream(ByteStreams.nullOutputStream());
        DataOutputStream stream = new DataOutputStream(countingStream);
        writeString(value, stream);
        return countingStream.getCount();
    }
}

From source file:com.google.errorprone.bugpatterns.android.IsLoggableTagLength.java

private boolean isValidTag(String tag) {
    return Utf8.encodedLength(tag) <= 23;
}

From source file:org.apache.beam.sdk.coders.StringUtf8Coder.java

/**
 * {@inheritDoc}/*  w  w w  .  j a v  a 2s .c  om*/
 *
 * @return the byte size of the UTF-8 encoding of the a string or, in a nested context,
 * the byte size of the encoding plus the encoded length prefix.
 */
@Override
public long getEncodedElementByteSize(String value) throws Exception {
    if (value == null) {
        throw new CoderException("cannot encode a null String");
    }
    int size = Utf8.encodedLength(value);
    return VarInt.getLength(size) + size;
}

From source file:com.google.template.soy.jbcsrc.restricted.BytecodeUtils.java

/** Returns an {@link Expression} that can load the given String constant. */
public static Expression constant(final String value) {
    checkNotNull(value);/*www.  j av  a  2  s.co  m*/
    checkArgument(Utf8.encodedLength(value) <= MAX_CONSTANT_STRING_LENGTH,
            "String is too long when encoded in utf8");
    return stringConstant(value);
}

From source file:com.google.template.soy.jbcsrc.restricted.BytecodeUtils.java

/**
 * Returns an {@link Expression} that can load the given String constant.
 *
 * <p>Unlike {@link #constant(String)} this can handle strings larger than 65K bytes.
 *///from  w ww  . ja  v  a  2  s .c o m
public static Expression constant(String value, ClassFieldManager manager) {
    int encodedLength = Utf8.encodedLength(value);
    if (encodedLength <= MAX_CONSTANT_STRING_LENGTH) {
        return stringConstant(value);
    }
    // else it is too big for a single constant pool entry so split it into a small number of
    // entries and generate a static final field to hold the cat'ed value.
    int startIndex = 0;
    Expression stringExpression = null;
    int length = value.length();
    do {
        int endIndex = offsetOf65KUtf8Bytes(value, startIndex, length);
        // N.B. we may end up splitting the string at a surrogate pair, but the class format uses
        // modified utf8 which is forgiving about such things.
        Expression substringConstant = stringConstant(value.substring(startIndex, endIndex));
        startIndex = endIndex;
        if (stringExpression == null) {
            stringExpression = substringConstant;
        } else {
            stringExpression = stringExpression.invoke(MethodRef.STRING_CONCAT, substringConstant);
        }
    } while (startIndex < length);
    FieldRef fieldRef = manager.addStaticField(LARGE_STRING_CONSTANT_NAME, stringExpression);
    return fieldRef.accessor();
}

From source file:org.apache.beam.runners.dataflow.DataflowRunner.java

@Override
public DataflowPipelineJob run(Pipeline pipeline) {
    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
    if (containsUnboundedPCollection(pipeline)) {
        options.setStreaming(true);//from  ww  w.ja  v a 2 s .c  om
    }
    replaceTransforms(pipeline);

    LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
            + "related to Google Compute Engine usage and other Google Cloud Services.");

    List<DataflowPackage> packages = options.getStager().stageFiles();

    // Set a unique client_request_id in the CreateJob request.
    // This is used to ensure idempotence of job creation across retried
    // attempts to create a job. Specifically, if the service returns a job with
    // a different client_request_id, it means the returned one is a different
    // job previously created with the same job name, and that the job creation
    // has been effectively rejected. The SDK should return
    // Error::Already_Exists to user in that case.
    int randomNum = new Random().nextInt(9000) + 1000;
    String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
            .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;

    // Try to create a debuggee ID. This must happen before the job is translated since it may
    // update the options.
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    maybeRegisterDebuggee(dataflowOptions, requestId);

    JobSpecification jobSpecification = translator.translate(pipeline, this, packages);
    Job newJob = jobSpecification.getJob();
    newJob.setClientRequestId(requestId);

    ReleaseInfo releaseInfo = ReleaseInfo.getReleaseInfo();
    String version = releaseInfo.getVersion();
    checkState(!version.equals("${pom.version}"),
            "Unable to submit a job to the Dataflow service with unset version ${pom.version}");
    System.out.println("Dataflow SDK version: " + version);

    newJob.getEnvironment().setUserAgent((Map) releaseInfo.getProperties());
    // The Dataflow Service may write to the temporary directory directly, so
    // must be verified.
    if (!isNullOrEmpty(options.getGcpTempLocation())) {
        newJob.getEnvironment().setTempStoragePrefix(
                dataflowOptions.getPathValidator().verifyPath(options.getGcpTempLocation()));
    }
    newJob.getEnvironment().setDataset(options.getTempDatasetId());
    newJob.getEnvironment().setExperiments(options.getExperiments());

    // Set the Docker container image that executes Dataflow worker harness, residing in Google
    // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
    String workerHarnessContainerImage = getContainerImageForJob(options);
    for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
        workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
    }

    newJob.getEnvironment().setVersion(getEnvironmentVersion(options));

    if (hooks != null) {
        hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
    }

    if (!isNullOrEmpty(options.getDataflowJobFile()) || !isNullOrEmpty(options.getTemplateLocation())) {
        boolean isTemplate = !isNullOrEmpty(options.getTemplateLocation());
        if (isTemplate) {
            checkArgument(isNullOrEmpty(options.getDataflowJobFile()),
                    "--dataflowJobFile and --templateLocation are mutually exclusive.");
        }
        String fileLocation = firstNonNull(options.getTemplateLocation(), options.getDataflowJobFile());
        checkArgument(fileLocation.startsWith("/") || fileLocation.startsWith("gs://"),
                "Location must be local or on Cloud Storage, got %s.", fileLocation);
        ResourceId fileResource = FileSystems.matchNewResource(fileLocation, false /* isDirectory */);
        String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
        try (PrintWriter printWriter = new PrintWriter(
                Channels.newOutputStream(FileSystems.create(fileResource, MimeTypes.TEXT)))) {
            printWriter.print(workSpecJson);
            LOG.info("Printed job specification to {}", fileLocation);
        } catch (IOException ex) {
            String error = String.format("Cannot create output file at %s", fileLocation);
            if (isTemplate) {
                throw new RuntimeException(error, ex);
            } else {
                LOG.warn(error, ex);
            }
        }
        if (isTemplate) {
            LOG.info("Template successfully created.");
            return new DataflowTemplateJob();
        }
    }

    String jobIdToUpdate = null;
    if (options.isUpdate()) {
        jobIdToUpdate = getJobIdFromName(options.getJobName());
        newJob.setTransformNameMapping(options.getTransformNameMapping());
        newJob.setReplaceJobId(jobIdToUpdate);
    }
    Job jobResult;
    try {
        jobResult = dataflowClient.createJob(newJob);
    } catch (GoogleJsonResponseException e) {
        String errorMessages = "Unexpected errors";
        if (e.getDetails() != null) {
            if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
                errorMessages = "The size of the serialized JSON representation of the pipeline "
                        + "exceeds the allowable limit. "
                        + "For more information, please check the FAQ link below:\n"
                        + "https://cloud.google.com/dataflow/faq";
            } else {
                errorMessages = e.getDetails().getMessage();
            }
        }
        throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
    } catch (IOException e) {
        throw new RuntimeException("Failed to create a workflow job", e);
    }

    // Use a raw client for post-launch monitoring, as status calls may fail
    // regularly and need not be retried automatically.
    DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(DataflowClient.create(options),
            jobResult.getId(), options, jobSpecification.getStepNames());

    // If the service returned client request id, the SDK needs to compare it
    // with the original id generated in the request, if they are not the same
    // (i.e., the returned job is not created by this request), throw
    // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedException
    // depending on whether this is a reload or not.
    if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
            && !jobResult.getClientRequestId().equals(requestId)) {
        // If updating a job.
        if (options.isUpdate()) {
            throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
                    String.format(
                            "The job named %s with id: %s has already been updated into job id: %s "
                                    + "and cannot be updated again.",
                            newJob.getName(), jobIdToUpdate, jobResult.getId()));
        } else {
            throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
                    String.format("There is already an active job named %s with id: %s. If you want "
                            + "to submit a second job, try again by setting a different name using --jobName.",
                            newJob.getName(), jobResult.getId()));
        }
    }

    LOG.info("To access the Dataflow monitoring console, please navigate to {}",
            MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
    System.out.println("Submitted job: " + jobResult.getId());

    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
            MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));

    return dataflowPipelineJob;
}

From source file:org.apache.beam.sdk.runners.DataflowPipelineRunner.java

@Override
public DataflowPipelineJob run(Pipeline pipeline) {
    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);

    LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
            + "related to Google Compute Engine usage and other Google Cloud Services.");

    List<DataflowPackage> packages = options.getStager().stageFiles();

    // Set a unique client_request_id in the CreateJob request.
    // This is used to ensure idempotence of job creation across retried
    // attempts to create a job. Specifically, if the service returns a job with
    // a different client_request_id, it means the returned one is a different
    // job previously created with the same job name, and that the job creation
    // has been effectively rejected. The SDK should return
    // Error::Already_Exists to user in that case.
    int randomNum = new Random().nextInt(9000) + 1000;
    String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
            .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;

    // Try to create a debuggee ID. This must happen before the job is translated since it may
    // update the options.
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    maybeRegisterDebuggee(dataflowOptions, requestId);

    JobSpecification jobSpecification = translator.translate(pipeline, this, packages);
    Job newJob = jobSpecification.getJob();
    newJob.setClientRequestId(requestId);

    String version = ReleaseInfo.getReleaseInfo().getVersion();
    System.out.println("Dataflow SDK version: " + version);

    newJob.getEnvironment().setUserAgent(ReleaseInfo.getReleaseInfo());
    // The Dataflow Service may write to the temporary directory directly, so
    // must be verified.
    if (!Strings.isNullOrEmpty(options.getTempLocation())) {
        newJob.getEnvironment()/*from w  w w.j  av  a  2  s  .  c o m*/
                .setTempStoragePrefix(dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
    }
    newJob.getEnvironment().setDataset(options.getTempDatasetId());
    newJob.getEnvironment().setExperiments(options.getExperiments());

    // Set the Docker container image that executes Dataflow worker harness, residing in Google
    // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
    String workerHarnessContainerImage = options.as(DataflowPipelineWorkerPoolOptions.class)
            .getWorkerHarnessContainerImage();
    for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
        workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
    }

    // Requirements about the service.
    Map<String, Object> environmentVersion = new HashMap<>();
    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
    newJob.getEnvironment().setVersion(environmentVersion);
    // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can
    // autoscale if specified.
    String jobType = "JAVA_BATCH_AUTOSCALING";

    if (options.isStreaming()) {
        jobType = "STREAMING";
    }
    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);

    if (hooks != null) {
        hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
    }

    if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) {
        try (PrintWriter printWriter = new PrintWriter(new File(options.getDataflowJobFile()))) {
            String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
            printWriter.print(workSpecJson);
            LOG.info("Printed workflow specification to {}", options.getDataflowJobFile());
        } catch (IllegalStateException ex) {
            LOG.warn("Cannot translate workflow spec to json for debug.");
        } catch (FileNotFoundException ex) {
            LOG.warn("Cannot create workflow spec output file.");
        }
    }

    String jobIdToUpdate = null;
    if (options.isUpdate()) {
        jobIdToUpdate = getJobIdFromName(options.getJobName());
        newJob.setTransformNameMapping(options.getTransformNameMapping());
        newJob.setReplaceJobId(jobIdToUpdate);
    }
    Job jobResult;
    try {
        jobResult = dataflowClient.projects().jobs().create(options.getProject(), newJob).execute();
    } catch (GoogleJsonResponseException e) {
        String errorMessages = "Unexpected errors";
        if (e.getDetails() != null) {
            if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
                errorMessages = "The size of the serialized JSON representation of the pipeline "
                        + "exceeds the allowable limit. "
                        + "For more information, please check the FAQ link below:\n"
                        + "https://cloud.google.com/dataflow/faq";
            } else {
                errorMessages = e.getDetails().getMessage();
            }
        }
        throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
    } catch (IOException e) {
        throw new RuntimeException("Failed to create a workflow job", e);
    }

    // Obtain all of the extractors from the PTransforms used in the pipeline so the
    // DataflowPipelineJob has access to them.
    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = aggregatorExtractor
            .getAggregatorSteps();

    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(aggregatorSteps,
            jobSpecification.getStepNames());

    // Use a raw client for post-launch monitoring, as status calls may fail
    // regularly and need not be retried automatically.
    DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(options.getProject(), jobResult.getId(),
            DataflowTransport.newRawDataflowClient(options).build(), aggregatorTransforms);

    // If the service returned client request id, the SDK needs to compare it
    // with the original id generated in the request, if they are not the same
    // (i.e., the returned job is not created by this request), throw
    // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion
    // depending on whether this is a reload or not.
    if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
            && !jobResult.getClientRequestId().equals(requestId)) {
        // If updating a job.
        if (options.isUpdate()) {
            throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
                    String.format(
                            "The job named %s with id: %s has already been updated into job id: %s "
                                    + "and cannot be updated again.",
                            newJob.getName(), jobIdToUpdate, jobResult.getId()));
        } else {
            throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
                    String.format("There is already an active job named %s with id: %s. If you want "
                            + "to submit a second job, try again by setting a different name using --jobName.",
                            newJob.getName(), jobResult.getId()));
        }
    }

    LOG.info("To access the Dataflow monitoring console, please navigate to {}",
            MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
    System.out.println("Submitted job: " + jobResult.getId());

    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
            MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));

    return dataflowPipelineJob;
}

From source file:com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.java

@Override
public DataflowPipelineJob run(Pipeline pipeline) {
    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);

    LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
            + "related to Google Compute Engine usage and other Google Cloud Services.");

    List<DataflowPackage> packages = options.getStager().stageFiles();

    // Set a unique client_request_id in the CreateJob request.
    // This is used to ensure idempotence of job creation across retried
    // attempts to create a job. Specifically, if the service returns a job with
    // a different client_request_id, it means the returned one is a different
    // job previously created with the same job name, and that the job creation
    // has been effectively rejected. The SDK should return
    // Error::Already_Exists to user in that case.
    int randomNum = new Random().nextInt(9000) + 1000;
    String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
            .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;

    // Try to create a debuggee ID. This must happen before the job is translated since it may
    // update the options.
    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
    maybeRegisterDebuggee(dataflowOptions, requestId);

    JobSpecification jobSpecification = translator.translate(pipeline, this, packages);
    Job newJob = jobSpecification.getJob();
    newJob.setClientRequestId(requestId);

    String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
    System.out.println("Dataflow SDK version: " + version);

    newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
    // The Dataflow Service may write to the temporary directory directly, so
    // must be verified.
    if (!Strings.isNullOrEmpty(options.getTempLocation())) {
        newJob.getEnvironment()//from  w  w  w  . j av  a  2  s. c  o m
                .setTempStoragePrefix(dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
    }
    newJob.getEnvironment().setDataset(options.getTempDatasetId());
    newJob.getEnvironment().setExperiments(options.getExperiments());

    // Set the Docker container image that executes Dataflow worker harness, residing in Google
    // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
    String workerHarnessContainerImage = options.as(DataflowPipelineWorkerPoolOptions.class)
            .getWorkerHarnessContainerImage();
    for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
        workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
    }

    // Requirements about the service.
    Map<String, Object> environmentVersion = new HashMap<>();
    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
    newJob.getEnvironment().setVersion(environmentVersion);
    // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can
    // autoscale if specified.
    String jobType = "JAVA_BATCH_AUTOSCALING";

    if (options.isStreaming()) {
        jobType = "STREAMING";
    }
    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);

    if (hooks != null) {
        hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
    }

    if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) {
        runJobFileHooks(newJob);
    }
    if (hooks != null && !hooks.shouldActuallyRunJob()) {
        return null;
    }

    String jobIdToUpdate = null;
    if (options.getUpdate()) {
        jobIdToUpdate = getJobIdFromName(options.getJobName());
        newJob.setTransformNameMapping(options.getTransformNameMapping());
        newJob.setReplaceJobId(jobIdToUpdate);
    }
    Job jobResult;
    try {
        jobResult = dataflowClient.projects().jobs().create(options.getProject(), newJob).execute();
    } catch (GoogleJsonResponseException e) {
        String errorMessages = "Unexpected errors";
        if (e.getDetails() != null) {
            if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
                errorMessages = "The size of the serialized JSON representation of the pipeline "
                        + "exceeds the allowable limit. "
                        + "For more information, please check the FAQ link below:\n"
                        + "https://cloud.google.com/dataflow/faq";
            } else {
                errorMessages = e.getDetails().getMessage();
            }
        }
        throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
    } catch (IOException e) {
        throw new RuntimeException("Failed to create a workflow job", e);
    }

    // Obtain all of the extractors from the PTransforms used in the pipeline so the
    // DataflowPipelineJob has access to them.
    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = aggregatorExtractor
            .getAggregatorSteps();

    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(aggregatorSteps,
            jobSpecification.getStepNames());

    // Use a raw client for post-launch monitoring, as status calls may fail
    // regularly and need not be retried automatically.
    DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(options.getProject(), jobResult.getId(),
            Transport.newDataflowClient(options).build(), aggregatorTransforms);

    // If the service returned client request id, the SDK needs to compare it
    // with the original id generated in the request, if they are not the same
    // (i.e., the returned job is not created by this request), throw
    // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion
    // depending on whether this is a reload or not.
    if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
            && !jobResult.getClientRequestId().equals(requestId)) {
        // If updating a job.
        if (options.getUpdate()) {
            throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
                    String.format(
                            "The job named %s with id: %s has already been updated into job id: %s "
                                    + "and cannot be updated again.",
                            newJob.getName(), jobIdToUpdate, jobResult.getId()));
        } else {
            throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
                    String.format("There is already an active job named %s with id: %s. If you want "
                            + "to submit a second job, try again by setting a different name using --jobName.",
                            newJob.getName(), jobResult.getId()));
        }
    }

    LOG.info("To access the Dataflow monitoring console, please navigate to {}",
            MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
    System.out.println("Submitted job: " + jobResult.getId());

    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
            MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));

    return dataflowPipelineJob;
}