Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.runners.dataflow.worker; import static com.google.common.base.Preconditions.checkArgument; import static org.apache.beam.runners.dataflow.util.Structs.getString; import com.google.auto.service.AutoService; import com.google.common.collect.ImmutableMap; import com.google.common.math.DoubleMath; import java.math.RoundingMode; import java.util.Map; import javax.annotation.Nullable; import org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord; import org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder; import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions; import org.apache.beam.runners.dataflow.util.CloudObject; import org.apache.beam.runners.dataflow.worker.util.WorkerPropertyNames; import org.apache.beam.runners.dataflow.worker.util.common.worker.Sink; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder; /** * Creates an {@link IsmSink} from a {@link CloudObject} spec. Note that it is invalid to use a non * {@link IsmRecordCoder} with this sink factory. */ public class IsmSinkFactory implements SinkFactory { /** A {@link SinkFactory.Registrar} for ISM sinks. */ @AutoService(SinkFactory.Registrar.class) public static class Registrar implements SinkFactory.Registrar { @Override public Map<String, SinkFactory> factories() { IsmSinkFactory factory = new IsmSinkFactory(); return ImmutableMap.of("IsmSink", factory, "org.apache.beam.runners.dataflow.worker.IsmSink", factory); } } // Limit Bloom filters to be at most 0.1% of the worker cache size each private static final double BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER = 0.001; private static final long MIN_BLOOM_FILTER_SIZE_BYTES = 128L; @Override public Sink<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception { // The validity of this coder is checked in detail by the typed create, below @SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder; String filename = getString(spec, WorkerPropertyNames.FILENAME); checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder); WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder; checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder()); @SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder(); long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() // Note the conversion from MiB to bytes * 1024 * 1024, RoundingMode.DOWN)); return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes); } }