org.apache.beam.runners.dataflow.worker.DataflowRunnerHarness.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.runners.dataflow.worker.DataflowRunnerHarness.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.runners.dataflow.worker;

import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.runners.dataflow.DataflowRunner;
import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions;
import org.apache.beam.runners.dataflow.worker.fn.BeamFnControlService;
import org.apache.beam.runners.dataflow.worker.fn.data.BeamFnDataGrpcService;
import org.apache.beam.runners.dataflow.worker.fn.logging.BeamFnLoggingService;
import org.apache.beam.runners.dataflow.worker.fn.stream.ServerStreamObserverFactory;
import org.apache.beam.runners.dataflow.worker.logging.DataflowWorkerLoggingInitializer;
import org.apache.beam.runners.fnexecution.GrpcContextHeaderAccessorProvider;
import org.apache.beam.runners.fnexecution.ServerFactory;
import org.apache.beam.runners.fnexecution.control.FnApiControlClient;
import org.apache.beam.runners.fnexecution.state.GrpcStateService;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.vendor.grpc.v1p13p1.io.grpc.Server;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is the harness for executing Dataflow jobs that make use of the Beam Fn API, and operates
 * independently of the SDK(s) being used by users.
 *
 * <p>The worker harness is a mediator between Dataflow Service and the SDK, translating
 * instructions (such as map tasks) from the Dataflow Service/DFE into Fn API instructions, and vice
 * versa.
 */
public class DataflowRunnerHarness {
    private static final Logger LOG = LoggerFactory.getLogger(DataflowRunnerHarness.class);

    /** Fetches and processes work units from the Dataflow service. */
    public static void main(String[] unusedArgs) throws Exception {
        @Nullable
        RunnerApi.Pipeline pipeline = DataflowWorkerHarnessHelper.getPipelineFromEnv();

        // This descriptor is used for all services except logging. They are isolated to keep
        // critical traffic protected from best effort traffic.
        ApiServiceDescriptor controlApiService = DataflowWorkerHarnessHelper.getControlDescriptor();
        ApiServiceDescriptor loggingApiService = DataflowWorkerHarnessHelper.getLoggingDescriptor();

        LOG.info("{} started, using port {} for control, {} for logging.", DataflowRunnerHarness.class,
                controlApiService, loggingApiService);

        DataflowWorkerHarnessHelper.initializeLogging(DataflowRunnerHarness.class);
        DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper
                .initializeGlobalStateAndPipelineOptions(DataflowRunnerHarness.class);
        DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);

        // Initialized registered file systems.
        FileSystems.setDefaultPipelineOptions(pipelineOptions);

        DataflowPipelineDebugOptions dataflowOptions = pipelineOptions.as(DataflowPipelineDebugOptions.class);
        ServerFactory serverFactory;
        if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll_domain_socket")) {
            serverFactory = ServerFactory.createEpollDomainSocket();
        } else if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll")) {
            serverFactory = ServerFactory.createEpollSocket();
        } else {
            serverFactory = ServerFactory.createDefault();
        }
        ServerStreamObserverFactory streamObserverFactory = ServerStreamObserverFactory
                .fromOptions(pipelineOptions);

        Server servicesServer = null;
        Server loggingServer = null;
        try (BeamFnLoggingService beamFnLoggingService = new BeamFnLoggingService(loggingApiService,
                DataflowWorkerLoggingInitializer.getSdkLoggingHandler()::publish, streamObserverFactory::from,
                GrpcContextHeaderAccessorProvider.getHeaderAccessor());
                BeamFnControlService beamFnControlService = new BeamFnControlService(controlApiService,
                        streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
                BeamFnDataGrpcService beamFnDataService = new BeamFnDataGrpcService(pipelineOptions,
                        controlApiService, streamObserverFactory::from,
                        GrpcContextHeaderAccessorProvider.getHeaderAccessor());
                GrpcStateService beamFnStateService = GrpcStateService.create()) {

            servicesServer = serverFactory.create(
                    ImmutableList.of(beamFnControlService, beamFnDataService, beamFnStateService),
                    controlApiService);

            loggingServer = serverFactory.create(ImmutableList.of(beamFnLoggingService), loggingApiService);

            start(pipeline, pipelineOptions, beamFnControlService, beamFnDataService, controlApiService,
                    beamFnStateService);
            servicesServer.shutdown();
            loggingServer.shutdown();
        } finally {
            if (servicesServer != null) {
                servicesServer.awaitTermination(30, TimeUnit.SECONDS);
                servicesServer.shutdownNow();
            }
            if (loggingServer != null) {
                loggingServer.awaitTermination(30, TimeUnit.SECONDS);
                loggingServer.shutdownNow();
            }
        }
    }

    @SuppressWarnings("InfiniteLoopStatement")
    public static void start(@Nullable RunnerApi.Pipeline pipeline, DataflowWorkerHarnessOptions pipelineOptions,
            BeamFnControlService beamFnControlService, BeamFnDataGrpcService beamFnDataService,
            ApiServiceDescriptor stateApiServiceDescriptor, GrpcStateService beamFnStateService) throws Exception {

        SdkHarnessRegistry sdkHarnessRegistry = SdkHarnessRegistries
                .createFnApiSdkHarnessRegistry(stateApiServiceDescriptor, beamFnStateService, beamFnDataService);
        if (pipelineOptions.isStreaming()) {
            DataflowWorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG);
            LOG.info("Initializing Streaming Worker.");

            StreamingDataflowWorker worker = StreamingDataflowWorker.forStreamingFnWorkerHarness(
                    Collections.emptyList(), client, pipelineOptions, pipeline, sdkHarnessRegistry);
            worker.startStatusPages();
            worker.start();
            ExecutorService executor = Executors.newSingleThreadExecutor();
            executor.execute(() -> { // Task to get new client connections
                while (true) {
                    LOG.info("Waiting for client.");
                    // Never close controlClient. It will be closed  when the client terminates the
                    // connection.
                    FnApiControlClient controlClient = beamFnControlService.get();
                    LOG.info("Control client connected for {}", controlClient.getWorkerId());
                    controlClient.onClose(sdkHarnessRegistry::unregisterWorkerClient);
                    // register the client with worker once connected
                    sdkHarnessRegistry.registerWorkerClient(controlClient);
                }
            });
            worker.waitTillExecutionFinishes();
            executor.shutdownNow();
        } else {
            while (true) {
                try (FnApiControlClient controlClient = beamFnControlService.get()) {
                    DataflowWorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG);
                    LOG.info("Initializing Batch Worker.");
                    BatchDataflowWorker worker = BatchDataflowWorker.forBatchFnWorkerHarness(pipeline,
                            sdkHarnessRegistry, client, pipelineOptions);
                    // register the client with worker once connected
                    sdkHarnessRegistry.registerWorkerClient(controlClient);
                    LOG.info("Client connected.");
                    try {
                        while (true) {
                            worker.getAndPerformWork();
                        }
                    } catch (IOException e) {
                        LOG.error("Getting and performing work failed.", e);
                    }
                }
            }
        }
    }
}