org.apache.tez.tests.TestExternalTezServicesErrors.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.tests.TestExternalTezServicesErrors.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.tests;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.EnumSet;
import java.util.List;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.tez.client.TezClient;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.api.client.StatusGetOpts;
import org.apache.tez.dag.app.ErrorPluginConfiguration;
import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
import org.apache.tez.dag.app.launcher.TezTestServiceContainerLauncherWithErrors;
import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerServiceWithErrors;
import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorWithErrors;
import org.apache.tez.examples.JoinValidateConfigured;
import org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor;
import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor;
import org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor;
import org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TestExternalTezServicesErrors {

    private static final Logger LOG = LoggerFactory.getLogger(TestExternalTezServicesErrors.class);

    private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
    private static final String EXT_THROW_ERROR_ENTITY_NAME = "ExtServiceTestThrowErrors";
    private static final String EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME = "ExtServiceTestReportNonFatalErrors";
    private static final String EXT_REPORT_FATAL_ERROR_ENTITY_NAME = "ExtServiceTestReportFatalErrors";

    private static final String SUFFIX_LAUNCHER = "ContainerLauncher";
    private static final String SUFFIX_TASKCOMM = "TaskCommunicator";
    private static final String SUFFIX_SCHEDULER = "TaskScheduler";

    private static ExternalTezServiceTestHelper extServiceTestHelper;

    private static ServicePluginsDescriptor servicePluginsDescriptor;

    private static final Path SRC_DATA_DIR = new Path(
            "/tmp/" + TestExternalTezServicesErrors.class.getSimpleName());
    private static final Path HASH_JOIN_EXPECTED_RESULT_PATH = new Path(SRC_DATA_DIR, "expectedOutputPath");
    private static final Path HASH_JOIN_OUTPUT_PATH = new Path(SRC_DATA_DIR, "outPath");

    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_EXT_SERVICE_PUSH = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
    // Throw error contexts
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_THROW = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_THROW_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_THROW = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_THROW_ERROR_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_THROW = Vertex.VertexExecutionContext
            .create(EXT_THROW_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);

    // Report-non-fatal contexts
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_REPORT_NON_FATAL = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_REPORT_NON_FATAL = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_REPORT_NON_FATAL = Vertex.VertexExecutionContext
            .create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);

    // Report fatal contexts
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_REPORT_FATAL = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_REPORT_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_REPORT_FATAL = Vertex.VertexExecutionContext
            .create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_REPORT_FATAL_ERROR_ENTITY_NAME);
    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_REPORT_FATAL = Vertex.VertexExecutionContext
            .create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);

    private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_DEFAULT = EXECUTION_CONTEXT_EXT_SERVICE_PUSH;

    private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExternalTezServicesErrors.class.getName()
            + "-tmpDir";

    @BeforeClass
    public static void setup() throws Exception {

        extServiceTestHelper = new ExternalTezServiceTestHelper(TEST_ROOT_DIR);
        UserPayload userPayload = TezUtils.createUserPayloadFromConf(extServiceTestHelper.getConfForJobs());
        UserPayload userPayloadThrowError = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createThrowErrorConf());

        UserPayload userPayloadReportFatalErrorLauncher = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_LAUNCHER));
        UserPayload userPayloadReportFatalErrorTaskComm = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_TASKCOMM));
        UserPayload userPayloadReportFatalErrorScheduler = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_SCHEDULER));

        UserPayload userPayloadReportNonFatalErrorLauncher = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_LAUNCHER));
        UserPayload userPayloadReportNonFatalErrorTaskComm = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_TASKCOMM));
        UserPayload userPayloadReportNonFatalErrorScheduler = ErrorPluginConfiguration
                .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_SCHEDULER));

        TaskSchedulerDescriptor[] taskSchedulerDescriptors = new TaskSchedulerDescriptor[] {
                TaskSchedulerDescriptor
                        .create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskSchedulerService.class.getName())
                        .setUserPayload(userPayload),
                TaskSchedulerDescriptor
                        .create(EXT_THROW_ERROR_ENTITY_NAME,
                                TezTestServiceTaskSchedulerServiceWithErrors.class.getName())
                        .setUserPayload(userPayloadThrowError),
                TaskSchedulerDescriptor
                        .create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceTaskSchedulerServiceWithErrors.class.getName())
                        .setUserPayload(userPayloadReportFatalErrorScheduler),
                TaskSchedulerDescriptor
                        .create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceTaskSchedulerServiceWithErrors.class.getName())
                        .setUserPayload(userPayloadReportNonFatalErrorScheduler), };

        ContainerLauncherDescriptor[] containerLauncherDescriptors = new ContainerLauncherDescriptor[] {
                ContainerLauncherDescriptor
                        .create(EXT_PUSH_ENTITY_NAME, TezTestServiceNoOpContainerLauncher.class.getName())
                        .setUserPayload(userPayload),
                ContainerLauncherDescriptor
                        .create(EXT_THROW_ERROR_ENTITY_NAME,
                                TezTestServiceContainerLauncherWithErrors.class.getName())
                        .setUserPayload(userPayloadThrowError),
                ContainerLauncherDescriptor
                        .create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceContainerLauncherWithErrors.class.getName())
                        .setUserPayload(userPayloadReportFatalErrorLauncher),
                ContainerLauncherDescriptor
                        .create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceContainerLauncherWithErrors.class.getName())
                        .setUserPayload(userPayloadReportNonFatalErrorLauncher) };

        TaskCommunicatorDescriptor[] taskCommunicatorDescriptors = new TaskCommunicatorDescriptor[] {
                TaskCommunicatorDescriptor
                        .create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskCommunicatorImpl.class.getName())
                        .setUserPayload(userPayload),
                TaskCommunicatorDescriptor
                        .create(EXT_THROW_ERROR_ENTITY_NAME,
                                TezTestServiceTaskCommunicatorWithErrors.class.getName())
                        .setUserPayload(userPayloadThrowError),
                TaskCommunicatorDescriptor
                        .create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceTaskCommunicatorWithErrors.class.getName())
                        .setUserPayload(userPayloadReportFatalErrorTaskComm),
                TaskCommunicatorDescriptor
                        .create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
                                TezTestServiceTaskCommunicatorWithErrors.class.getName())
                        .setUserPayload(userPayloadReportNonFatalErrorTaskComm) };

        servicePluginsDescriptor = ServicePluginsDescriptor.create(true, true, taskSchedulerDescriptors,
                containerLauncherDescriptors, taskCommunicatorDescriptors);

        extServiceTestHelper.setupSharedTezClient(servicePluginsDescriptor);

        // Generate the join data set used for each run.
        // Can a timeout be enforced here ?
        Path dataPath1 = new Path(SRC_DATA_DIR, "inPath1");
        Path dataPath2 = new Path(SRC_DATA_DIR, "inPath2");
        extServiceTestHelper.setupHashJoinData(SRC_DATA_DIR, dataPath1, dataPath2, HASH_JOIN_EXPECTED_RESULT_PATH,
                HASH_JOIN_OUTPUT_PATH);

        extServiceTestHelper.shutdownSharedTezClient();
    }

    @AfterClass
    public static void tearDown() throws IOException, TezException {
        extServiceTestHelper.tearDownAll();
    }

    @Test(timeout = 90000)
    public void testContainerLauncherThrowError() throws Exception {
        testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_LAUNCHER_THROW, SUFFIX_LAUNCHER,
                Lists.newArrayList("Service Error",
                        DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR.name()));
    }

    @Test(timeout = 90000)
    public void testTaskCommunicatorThrowError() throws Exception {
        testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_TASKCOMM_THROW, SUFFIX_TASKCOMM, Lists
                .newArrayList("Service Error", DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR.name()));
    }

    @Test(timeout = 90000)
    public void testTaskSchedulerThrowError() throws Exception {
        testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_SCHEDULER_THROW, SUFFIX_SCHEDULER, Lists
                .newArrayList("Service Error", DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR.name()));
    }

    @Test(timeout = 150000)
    public void testNonFatalErrors() throws IOException, TezException, InterruptedException {
        String methodName = "testNonFatalErrors";
        TezConfiguration tezClientConf = new TezConfiguration(extServiceTestHelper.getConfForJobs());
        TezClient tezClient = TezClient
                .newBuilder(TestExternalTezServicesErrors.class.getSimpleName() + methodName + "_session",
                        tezClientConf)
                .setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();
        try {
            tezClient.start();
            LOG.info("TezSessionStarted for " + methodName);
            tezClient.waitTillReady();
            LOG.info("TezSession ready for submission for " + methodName);

            runAndVerifyForNonFatalErrors(tezClient, SUFFIX_LAUNCHER, EXECUTION_CONTEXT_LAUNCHER_REPORT_NON_FATAL);
            runAndVerifyForNonFatalErrors(tezClient, SUFFIX_TASKCOMM, EXECUTION_CONTEXT_TASKCOMM_REPORT_NON_FATAL);
            runAndVerifyForNonFatalErrors(tezClient, SUFFIX_SCHEDULER,
                    EXECUTION_CONTEXT_SCHEDULER_REPORT_NON_FATAL);

        } finally {
            tezClient.stop();
        }
    }

    @Test(timeout = 90000)
    public void testContainerLauncherReportFatalError() throws Exception {
        testFatalError("_testContainerLauncherReportFatalError_", EXECUTION_CONTEXT_LAUNCHER_REPORT_FATAL,
                SUFFIX_LAUNCHER, Lists.newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
                        ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
    }

    @Test(timeout = 90000)
    public void testTaskCommReportFatalError() throws Exception {
        testFatalError("_testTaskCommReportFatalError_", EXECUTION_CONTEXT_TASKCOMM_REPORT_FATAL, SUFFIX_TASKCOMM,
                Lists.newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
                        ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
    }

    @Test(timeout = 90000)
    public void testTaskSchedulerReportFatalError() throws Exception {
        testFatalError("_testTaskSchedulerReportFatalError_", EXECUTION_CONTEXT_SCHEDULER_REPORT_FATAL,
                SUFFIX_SCHEDULER, Lists.newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
                        ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
    }

    private void testFatalError(String methodName, Vertex.VertexExecutionContext lhsExecutionContext,
            String dagNameSuffix, List<String> expectedDiagMessages)
            throws IOException, TezException, YarnException, InterruptedException {
        TezConfiguration tezClientConf = new TezConfiguration(extServiceTestHelper.getConfForJobs());
        TezClient tezClient = TezClient
                .newBuilder(TestExternalTezServicesErrors.class.getSimpleName() + methodName + "_session",
                        tezClientConf)
                .setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();

        ApplicationId appId = null;
        try {
            tezClient.start();
            LOG.info("TezSessionStarted for " + methodName);
            tezClient.waitTillReady();
            LOG.info("TezSession ready for submission for " + methodName);

            JoinValidateConfigured joinValidate = new JoinValidateConfigured(EXECUTION_CONTEXT_DEFAULT,
                    lhsExecutionContext, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH,
                    dagNameSuffix);

            DAG dag = joinValidate.createDag(new TezConfiguration(extServiceTestHelper.getConfForJobs()),
                    HASH_JOIN_EXPECTED_RESULT_PATH, HASH_JOIN_OUTPUT_PATH, 3);

            DAGClient dagClient = tezClient.submitDAG(dag);

            DAGStatus dagStatus = dagClient
                    .waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
            assertEquals(DAGStatus.State.ERROR, dagStatus.getState());
            boolean foundDiag = false;
            for (String diag : dagStatus.getDiagnostics()) {
                foundDiag = checkDiag(diag, expectedDiagMessages);
                if (foundDiag) {
                    break;
                }
            }
            appId = tezClient.getAppMasterApplicationId();
            assertTrue(foundDiag);
        } catch (InterruptedException e) {
            e.printStackTrace();
        } finally {
            tezClient.stop();
        }
        // Verify the state of the application.
        if (appId != null) {
            YarnClient yarnClient = YarnClient.createYarnClient();
            try {
                yarnClient.init(tezClientConf);
                yarnClient.start();

                ApplicationReport appReport = yarnClient.getApplicationReport(appId);
                YarnApplicationState appState = appReport.getYarnApplicationState();
                while (!EnumSet
                        .of(YarnApplicationState.FINISHED, YarnApplicationState.FAILED, YarnApplicationState.KILLED)
                        .contains(appState)) {
                    Thread.sleep(200L);
                    appReport = yarnClient.getApplicationReport(appId);
                    appState = appReport.getYarnApplicationState();
                }

                // TODO Workaround for YARN-4554. AppReport does not provide diagnostics - need to fetch them from ApplicationAttemptReport
                ApplicationAttemptId appAttemptId = appReport.getCurrentApplicationAttemptId();
                ApplicationAttemptReport appAttemptReport = yarnClient.getApplicationAttemptReport(appAttemptId);
                String diag = appAttemptReport.getDiagnostics();
                assertEquals(FinalApplicationStatus.FAILED, appReport.getFinalApplicationStatus());
                assertEquals(YarnApplicationState.FINISHED, appReport.getYarnApplicationState());
                checkDiag(diag, expectedDiagMessages);
            } finally {
                yarnClient.stop();
            }
        }
    }

    private boolean checkDiag(String diag, List<String> expected) {
        boolean found = true;
        for (String exp : expected) {
            if (diag.contains(exp)) {
                found = true;
                continue;
            } else {
                found = false;
                break;
            }
        }
        return found;
    }

    private void runAndVerifyForNonFatalErrors(TezClient tezClient, String componentName,
            Vertex.VertexExecutionContext lhsContext) throws TezException, InterruptedException, IOException {
        LOG.info("Running JoinValidate with componentName reportNonFatalException");
        JoinValidateConfigured joinValidate = new JoinValidateConfigured(EXECUTION_CONTEXT_DEFAULT, lhsContext,
                EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, componentName);

        DAG dag = joinValidate.createDag(new TezConfiguration(extServiceTestHelper.getConfForJobs()),
                HASH_JOIN_EXPECTED_RESULT_PATH, HASH_JOIN_OUTPUT_PATH, 3);

        DAGClient dagClient = tezClient.submitDAG(dag);

        DAGStatus dagStatus = dagClient
                .waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
        assertEquals(DAGStatus.State.FAILED, dagStatus.getState());

        boolean foundDiag = false;
        for (String diag : dagStatus.getDiagnostics()) {
            if (diag.contains(ErrorPluginConfiguration.REPORT_NONFATAL_ERROR_MESSAGE)
                    && diag.contains(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE.name())) {
                foundDiag = true;
                break;
            }
        }
        assertTrue(foundDiag);
    }

}