List of usage examples for com.google.common.base Utf8 encodedLength
@CheckReturnValue public static int encodedLength(CharSequence sequence)
From source file:org.apache.bookkeeper.common.coder.StringUtf8Coder.java
@Override public int getSerializedSize(String value) { return Utf8.encodedLength(value); }
From source file:com.google.shipshape.analyzers.EncodingOffsetConverter.java
/** * Converts a UTF-16 code unit index to a byte index, based on the source provided when this * {@link EncodingOffsetConverter} was constructed. * * <p>Note that each call to this method may take O(n) time, where n is the length of the * fileContent. Do not call in a tight loop. * * @param utf16CodeUnitIndex An index into the source string in terms of UTF-16 code units * @return An index into the source string in terms of bytes *//*from w ww. j ava 2 s.com*/ public int toByteIndex(int utf16CodeUnitIndex) { if (utf16CodeUnitIndex < 0 || utf16CodeUnitIndex >= fileContent.length()) { throw new IllegalArgumentException( "index out of bounds: " + utf16CodeUnitIndex + ", length " + fileContent.length()); } if (encoding.equals(UTF_8)) { return Utf8.encodedLength(fileContent.subSequence(0, utf16CodeUnitIndex)); } else if (encoding.equals(ISO_8859_1)) { // ISO-8859-1 is a fixed 8-bit encoding, and it was incorporated as the first 256 code // points for Unicode. UTF-16 encodes code points U+0000 to U+D7FF and U+E000 to U+FFFF // as single 16-bit code units, so it is OK to just return the UTF-16 code unit index. return utf16CodeUnitIndex; } else { throw new IllegalStateException("Unsupported encoding: " + encoding); } }
From source file:com.google.cloud.dataflow.sdk.coders.StringUtf8Coder.java
/** * {@inheritDoc}//ww w .ja v a2 s .com * * @return the byte size of the UTF-8 encoding of the a string or, in a nested context, * the byte size of the encoding plus the encoded length prefix. */ @Override protected long getEncodedElementByteSize(String value, Context context) throws Exception { if (value == null) { throw new CoderException("cannot encode a null String"); } if (context.isWholeStream) { return Utf8.encodedLength(value); } else { CountingOutputStream countingStream = new CountingOutputStream(ByteStreams.nullOutputStream()); DataOutputStream stream = new DataOutputStream(countingStream); writeString(value, stream); return countingStream.getCount(); } }
From source file:com.google.errorprone.bugpatterns.android.IsLoggableTagLength.java
private boolean isValidTag(String tag) { return Utf8.encodedLength(tag) <= 23; }
From source file:org.apache.beam.sdk.coders.StringUtf8Coder.java
/** * {@inheritDoc}/* w w w . j a v a 2s .c om*/ * * @return the byte size of the UTF-8 encoding of the a string or, in a nested context, * the byte size of the encoding plus the encoded length prefix. */ @Override public long getEncodedElementByteSize(String value) throws Exception { if (value == null) { throw new CoderException("cannot encode a null String"); } int size = Utf8.encodedLength(value); return VarInt.getLength(size) + size; }
From source file:com.google.template.soy.jbcsrc.restricted.BytecodeUtils.java
/** Returns an {@link Expression} that can load the given String constant. */ public static Expression constant(final String value) { checkNotNull(value);/*www. j av a 2 s.co m*/ checkArgument(Utf8.encodedLength(value) <= MAX_CONSTANT_STRING_LENGTH, "String is too long when encoded in utf8"); return stringConstant(value); }
From source file:com.google.template.soy.jbcsrc.restricted.BytecodeUtils.java
/** * Returns an {@link Expression} that can load the given String constant. * * <p>Unlike {@link #constant(String)} this can handle strings larger than 65K bytes. *///from w ww . ja v a 2 s .c o m public static Expression constant(String value, ClassFieldManager manager) { int encodedLength = Utf8.encodedLength(value); if (encodedLength <= MAX_CONSTANT_STRING_LENGTH) { return stringConstant(value); } // else it is too big for a single constant pool entry so split it into a small number of // entries and generate a static final field to hold the cat'ed value. int startIndex = 0; Expression stringExpression = null; int length = value.length(); do { int endIndex = offsetOf65KUtf8Bytes(value, startIndex, length); // N.B. we may end up splitting the string at a surrogate pair, but the class format uses // modified utf8 which is forgiving about such things. Expression substringConstant = stringConstant(value.substring(startIndex, endIndex)); startIndex = endIndex; if (stringExpression == null) { stringExpression = substringConstant; } else { stringExpression = stringExpression.invoke(MethodRef.STRING_CONCAT, substringConstant); } } while (startIndex < length); FieldRef fieldRef = manager.addStaticField(LARGE_STRING_CONSTANT_NAME, stringExpression); return fieldRef.accessor(); }
From source file:org.apache.beam.runners.dataflow.DataflowRunner.java
@Override public DataflowPipelineJob run(Pipeline pipeline) { logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline); if (containsUnboundedPCollection(pipeline)) { options.setStreaming(true);//from ww w.ja v a 2 s .c om } replaceTransforms(pipeline); LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications " + "related to Google Compute Engine usage and other Google Cloud Services."); List<DataflowPackage> packages = options.getStager().stageFiles(); // Set a unique client_request_id in the CreateJob request. // This is used to ensure idempotence of job creation across retried // attempts to create a job. Specifically, if the service returns a job with // a different client_request_id, it means the returned one is a different // job previously created with the same job name, and that the job creation // has been effectively rejected. The SDK should return // Error::Already_Exists to user in that case. int randomNum = new Random().nextInt(9000) + 1000; String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC) .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum; // Try to create a debuggee ID. This must happen before the job is translated since it may // update the options. DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); maybeRegisterDebuggee(dataflowOptions, requestId); JobSpecification jobSpecification = translator.translate(pipeline, this, packages); Job newJob = jobSpecification.getJob(); newJob.setClientRequestId(requestId); ReleaseInfo releaseInfo = ReleaseInfo.getReleaseInfo(); String version = releaseInfo.getVersion(); checkState(!version.equals("${pom.version}"), "Unable to submit a job to the Dataflow service with unset version ${pom.version}"); System.out.println("Dataflow SDK version: " + version); newJob.getEnvironment().setUserAgent((Map) releaseInfo.getProperties()); // The Dataflow Service may write to the temporary directory directly, so // must be verified. if (!isNullOrEmpty(options.getGcpTempLocation())) { newJob.getEnvironment().setTempStoragePrefix( dataflowOptions.getPathValidator().verifyPath(options.getGcpTempLocation())); } newJob.getEnvironment().setDataset(options.getTempDatasetId()); newJob.getEnvironment().setExperiments(options.getExperiments()); // Set the Docker container image that executes Dataflow worker harness, residing in Google // Container Registry. Translator is guaranteed to create a worker pool prior to this point. String workerHarnessContainerImage = getContainerImageForJob(options); for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) { workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage); } newJob.getEnvironment().setVersion(getEnvironmentVersion(options)); if (hooks != null) { hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment()); } if (!isNullOrEmpty(options.getDataflowJobFile()) || !isNullOrEmpty(options.getTemplateLocation())) { boolean isTemplate = !isNullOrEmpty(options.getTemplateLocation()); if (isTemplate) { checkArgument(isNullOrEmpty(options.getDataflowJobFile()), "--dataflowJobFile and --templateLocation are mutually exclusive."); } String fileLocation = firstNonNull(options.getTemplateLocation(), options.getDataflowJobFile()); checkArgument(fileLocation.startsWith("/") || fileLocation.startsWith("gs://"), "Location must be local or on Cloud Storage, got %s.", fileLocation); ResourceId fileResource = FileSystems.matchNewResource(fileLocation, false /* isDirectory */); String workSpecJson = DataflowPipelineTranslator.jobToString(newJob); try (PrintWriter printWriter = new PrintWriter( Channels.newOutputStream(FileSystems.create(fileResource, MimeTypes.TEXT)))) { printWriter.print(workSpecJson); LOG.info("Printed job specification to {}", fileLocation); } catch (IOException ex) { String error = String.format("Cannot create output file at %s", fileLocation); if (isTemplate) { throw new RuntimeException(error, ex); } else { LOG.warn(error, ex); } } if (isTemplate) { LOG.info("Template successfully created."); return new DataflowTemplateJob(); } } String jobIdToUpdate = null; if (options.isUpdate()) { jobIdToUpdate = getJobIdFromName(options.getJobName()); newJob.setTransformNameMapping(options.getTransformNameMapping()); newJob.setReplaceJobId(jobIdToUpdate); } Job jobResult; try { jobResult = dataflowClient.createJob(newJob); } catch (GoogleJsonResponseException e) { String errorMessages = "Unexpected errors"; if (e.getDetails() != null) { if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) { errorMessages = "The size of the serialized JSON representation of the pipeline " + "exceeds the allowable limit. " + "For more information, please check the FAQ link below:\n" + "https://cloud.google.com/dataflow/faq"; } else { errorMessages = e.getDetails().getMessage(); } } throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e); } catch (IOException e) { throw new RuntimeException("Failed to create a workflow job", e); } // Use a raw client for post-launch monitoring, as status calls may fail // regularly and need not be retried automatically. DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(DataflowClient.create(options), jobResult.getId(), options, jobSpecification.getStepNames()); // If the service returned client request id, the SDK needs to compare it // with the original id generated in the request, if they are not the same // (i.e., the returned job is not created by this request), throw // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedException // depending on whether this is a reload or not. if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty() && !jobResult.getClientRequestId().equals(requestId)) { // If updating a job. if (options.isUpdate()) { throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob, String.format( "The job named %s with id: %s has already been updated into job id: %s " + "and cannot be updated again.", newJob.getName(), jobIdToUpdate, jobResult.getId())); } else { throw new DataflowJobAlreadyExistsException(dataflowPipelineJob, String.format("There is already an active job named %s with id: %s. If you want " + "to submit a second job, try again by setting a different name using --jobName.", newJob.getName(), jobResult.getId())); } } LOG.info("To access the Dataflow monitoring console, please navigate to {}", MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId())); System.out.println("Submitted job: " + jobResult.getId()); LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}", MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId())); return dataflowPipelineJob; }
From source file:org.apache.beam.sdk.runners.DataflowPipelineRunner.java
@Override public DataflowPipelineJob run(Pipeline pipeline) { logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline); LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications " + "related to Google Compute Engine usage and other Google Cloud Services."); List<DataflowPackage> packages = options.getStager().stageFiles(); // Set a unique client_request_id in the CreateJob request. // This is used to ensure idempotence of job creation across retried // attempts to create a job. Specifically, if the service returns a job with // a different client_request_id, it means the returned one is a different // job previously created with the same job name, and that the job creation // has been effectively rejected. The SDK should return // Error::Already_Exists to user in that case. int randomNum = new Random().nextInt(9000) + 1000; String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC) .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum; // Try to create a debuggee ID. This must happen before the job is translated since it may // update the options. DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); maybeRegisterDebuggee(dataflowOptions, requestId); JobSpecification jobSpecification = translator.translate(pipeline, this, packages); Job newJob = jobSpecification.getJob(); newJob.setClientRequestId(requestId); String version = ReleaseInfo.getReleaseInfo().getVersion(); System.out.println("Dataflow SDK version: " + version); newJob.getEnvironment().setUserAgent(ReleaseInfo.getReleaseInfo()); // The Dataflow Service may write to the temporary directory directly, so // must be verified. if (!Strings.isNullOrEmpty(options.getTempLocation())) { newJob.getEnvironment()/*from w w w.j av a 2 s . c o m*/ .setTempStoragePrefix(dataflowOptions.getPathValidator().verifyPath(options.getTempLocation())); } newJob.getEnvironment().setDataset(options.getTempDatasetId()); newJob.getEnvironment().setExperiments(options.getExperiments()); // Set the Docker container image that executes Dataflow worker harness, residing in Google // Container Registry. Translator is guaranteed to create a worker pool prior to this point. String workerHarnessContainerImage = options.as(DataflowPipelineWorkerPoolOptions.class) .getWorkerHarnessContainerImage(); for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) { workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage); } // Requirements about the service. Map<String, Object> environmentVersion = new HashMap<>(); environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION); newJob.getEnvironment().setVersion(environmentVersion); // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can // autoscale if specified. String jobType = "JAVA_BATCH_AUTOSCALING"; if (options.isStreaming()) { jobType = "STREAMING"; } environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType); if (hooks != null) { hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment()); } if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) { try (PrintWriter printWriter = new PrintWriter(new File(options.getDataflowJobFile()))) { String workSpecJson = DataflowPipelineTranslator.jobToString(newJob); printWriter.print(workSpecJson); LOG.info("Printed workflow specification to {}", options.getDataflowJobFile()); } catch (IllegalStateException ex) { LOG.warn("Cannot translate workflow spec to json for debug."); } catch (FileNotFoundException ex) { LOG.warn("Cannot create workflow spec output file."); } } String jobIdToUpdate = null; if (options.isUpdate()) { jobIdToUpdate = getJobIdFromName(options.getJobName()); newJob.setTransformNameMapping(options.getTransformNameMapping()); newJob.setReplaceJobId(jobIdToUpdate); } Job jobResult; try { jobResult = dataflowClient.projects().jobs().create(options.getProject(), newJob).execute(); } catch (GoogleJsonResponseException e) { String errorMessages = "Unexpected errors"; if (e.getDetails() != null) { if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) { errorMessages = "The size of the serialized JSON representation of the pipeline " + "exceeds the allowable limit. " + "For more information, please check the FAQ link below:\n" + "https://cloud.google.com/dataflow/faq"; } else { errorMessages = e.getDetails().getMessage(); } } throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e); } catch (IOException e) { throw new RuntimeException("Failed to create a workflow job", e); } // Obtain all of the extractors from the PTransforms used in the pipeline so the // DataflowPipelineJob has access to them. AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline); Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = aggregatorExtractor .getAggregatorSteps(); DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames()); // Use a raw client for post-launch monitoring, as status calls may fail // regularly and need not be retried automatically. DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(options.getProject(), jobResult.getId(), DataflowTransport.newRawDataflowClient(options).build(), aggregatorTransforms); // If the service returned client request id, the SDK needs to compare it // with the original id generated in the request, if they are not the same // (i.e., the returned job is not created by this request), throw // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion // depending on whether this is a reload or not. if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty() && !jobResult.getClientRequestId().equals(requestId)) { // If updating a job. if (options.isUpdate()) { throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob, String.format( "The job named %s with id: %s has already been updated into job id: %s " + "and cannot be updated again.", newJob.getName(), jobIdToUpdate, jobResult.getId())); } else { throw new DataflowJobAlreadyExistsException(dataflowPipelineJob, String.format("There is already an active job named %s with id: %s. If you want " + "to submit a second job, try again by setting a different name using --jobName.", newJob.getName(), jobResult.getId())); } } LOG.info("To access the Dataflow monitoring console, please navigate to {}", MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId())); System.out.println("Submitted job: " + jobResult.getId()); LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}", MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId())); return dataflowPipelineJob; }
From source file:com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.java
@Override public DataflowPipelineJob run(Pipeline pipeline) { logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline); LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications " + "related to Google Compute Engine usage and other Google Cloud Services."); List<DataflowPackage> packages = options.getStager().stageFiles(); // Set a unique client_request_id in the CreateJob request. // This is used to ensure idempotence of job creation across retried // attempts to create a job. Specifically, if the service returns a job with // a different client_request_id, it means the returned one is a different // job previously created with the same job name, and that the job creation // has been effectively rejected. The SDK should return // Error::Already_Exists to user in that case. int randomNum = new Random().nextInt(9000) + 1000; String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC) .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum; // Try to create a debuggee ID. This must happen before the job is translated since it may // update the options. DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); maybeRegisterDebuggee(dataflowOptions, requestId); JobSpecification jobSpecification = translator.translate(pipeline, this, packages); Job newJob = jobSpecification.getJob(); newJob.setClientRequestId(requestId); String version = DataflowReleaseInfo.getReleaseInfo().getVersion(); System.out.println("Dataflow SDK version: " + version); newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo()); // The Dataflow Service may write to the temporary directory directly, so // must be verified. if (!Strings.isNullOrEmpty(options.getTempLocation())) { newJob.getEnvironment()//from w w w . j av a 2 s. c o m .setTempStoragePrefix(dataflowOptions.getPathValidator().verifyPath(options.getTempLocation())); } newJob.getEnvironment().setDataset(options.getTempDatasetId()); newJob.getEnvironment().setExperiments(options.getExperiments()); // Set the Docker container image that executes Dataflow worker harness, residing in Google // Container Registry. Translator is guaranteed to create a worker pool prior to this point. String workerHarnessContainerImage = options.as(DataflowPipelineWorkerPoolOptions.class) .getWorkerHarnessContainerImage(); for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) { workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage); } // Requirements about the service. Map<String, Object> environmentVersion = new HashMap<>(); environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION); newJob.getEnvironment().setVersion(environmentVersion); // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can // autoscale if specified. String jobType = "JAVA_BATCH_AUTOSCALING"; if (options.isStreaming()) { jobType = "STREAMING"; } environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType); if (hooks != null) { hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment()); } if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) { runJobFileHooks(newJob); } if (hooks != null && !hooks.shouldActuallyRunJob()) { return null; } String jobIdToUpdate = null; if (options.getUpdate()) { jobIdToUpdate = getJobIdFromName(options.getJobName()); newJob.setTransformNameMapping(options.getTransformNameMapping()); newJob.setReplaceJobId(jobIdToUpdate); } Job jobResult; try { jobResult = dataflowClient.projects().jobs().create(options.getProject(), newJob).execute(); } catch (GoogleJsonResponseException e) { String errorMessages = "Unexpected errors"; if (e.getDetails() != null) { if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) { errorMessages = "The size of the serialized JSON representation of the pipeline " + "exceeds the allowable limit. " + "For more information, please check the FAQ link below:\n" + "https://cloud.google.com/dataflow/faq"; } else { errorMessages = e.getDetails().getMessage(); } } throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e); } catch (IOException e) { throw new RuntimeException("Failed to create a workflow job", e); } // Obtain all of the extractors from the PTransforms used in the pipeline so the // DataflowPipelineJob has access to them. AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline); Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = aggregatorExtractor .getAggregatorSteps(); DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames()); // Use a raw client for post-launch monitoring, as status calls may fail // regularly and need not be retried automatically. DataflowPipelineJob dataflowPipelineJob = new DataflowPipelineJob(options.getProject(), jobResult.getId(), Transport.newDataflowClient(options).build(), aggregatorTransforms); // If the service returned client request id, the SDK needs to compare it // with the original id generated in the request, if they are not the same // (i.e., the returned job is not created by this request), throw // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion // depending on whether this is a reload or not. if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty() && !jobResult.getClientRequestId().equals(requestId)) { // If updating a job. if (options.getUpdate()) { throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob, String.format( "The job named %s with id: %s has already been updated into job id: %s " + "and cannot be updated again.", newJob.getName(), jobIdToUpdate, jobResult.getId())); } else { throw new DataflowJobAlreadyExistsException(dataflowPipelineJob, String.format("There is already an active job named %s with id: %s. If you want " + "to submit a second job, try again by setting a different name using --jobName.", newJob.getName(), jobResult.getId())); } } LOG.info("To access the Dataflow monitoring console, please navigate to {}", MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId())); System.out.println("Submitted job: " + jobResult.getId()); LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}", MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId())); return dataflowPipelineJob; }