Example usage for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState

List of usage examples for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState.

Prototype

@Public
@Stable
public abstract YarnApplicationState getYarnApplicationState();

Source Link

Document

Get the YarnApplicationState of the application.

Usage

From source file:org.apache.flink.yarn.FlinkYarnClientBase.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN./*w  w  w . j av  a  2  s  . c o  m*/
 */
protected AbstractFlinkYarnCluster deployInternal() throws Exception {
    isReadyForDeployment();

    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    // Create application via yarnClient
    yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------

    Map<String, String> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }
    // ------------------ Check if the specified queue exists --------------

    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // ------------------ Check if the YARN Cluster has the requested resources --------------

    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
                + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB
                + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the JobManager available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb
                        + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are "
            + "connecting from the beginning because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink YARN client needs to wait until "
            + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.

    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    // ------------------ Prepare Application Master Container  ------------------------------

    // respect custom JVM options in the YAML file
    final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    boolean hasLogback = new File(logbackFile).exists();
    String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;

    boolean hasLog4j = new File(log4jFile).exists();
    if (hasLogback) {
        shipFiles.add(new File(logbackFile));
    }
    if (hasLog4j) {
        shipFiles.add(new File(log4jFile));
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx"
            + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts;

    if (hasLogback || hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\"";

        if (hasLogback) {
            amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
        }

        if (hasLog4j) {
            amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
        }
    }

    amCommand += " " + getApplicationMasterClass().getName() + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err";
    amContainer.setCommands(Collections.singletonList(amCommand));

    LOG.debug("Application Master start command: " + amCommand);

    // intialize HDFS
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[2 + shipFiles.size()];
    StringBuilder envShipFileList = new StringBuilder();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[2 + i] = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[2 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set classpath from YARN configuration
    Utils.setupEnv(conf, appMasterEnv);
    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    int waittime = 0;
    loop: while (true) {
        ApplicationReport report;
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            LOG.info("Deploying cluster, current state " + appState);
            if (waittime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        waittime += 1000;
        Thread.sleep(1000);
    }
    // print the application id for user to cancel themselves.
    if (isDetached()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    // the Flink cluster is deployed in YARN. Represent cluster
    return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached);
}

From source file:org.apache.flink.yarn.FlinkYarnCluster.java

License:Apache License

/**
 * Connect the FlinkYarnCluster to the ApplicationMaster.
 *
 * Detached YARN sessions don't need to connect to the ApplicationMaster.
 * Detached per job YARN sessions need to connect until the required number of TaskManagers have been started.
 * /*from w w  w  .j  ava  2 s  .c  o  m*/
 * @throws IOException
 */
public void connectToCluster() throws IOException {
    if (isConnected) {
        throw new IllegalStateException("Can not connect to the cluster again");
    }

    // start actor system
    LOG.info("Start actor system.");
    // find name of own public interface, able to connect to the JM
    // try to find address for 2 seconds. log after 400 ms.
    InetAddress ownHostname = ConnectionUtils.findConnectingAddress(jobManagerAddress, 2000, 400);
    actorSystem = AkkaUtils.createActorSystem(flinkConfig, new Some<Tuple2<String, Object>>(
            new Tuple2<String, Object>(ownHostname.getCanonicalHostName(), 0)));

    // Create the leader election service
    flinkConfig.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, this.jobManagerAddress.getHostName());
    flinkConfig.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, this.jobManagerAddress.getPort());

    LeaderRetrievalService leaderRetrievalService;

    try {
        leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(flinkConfig);
    } catch (Exception e) {
        throw new IOException("Could not create the leader retrieval service.", e);
    }

    // start application client
    LOG.info("Start application client.");

    applicationClient = actorSystem.actorOf(
            Props.create(ApplicationClient.class, flinkConfig, leaderRetrievalService), "applicationClient");

    actorRunner = new Thread(new Runnable() {
        @Override
        public void run() {
            // blocks until ApplicationClient has been stopped
            actorSystem.awaitTermination();

            // get final application report
            try {
                ApplicationReport appReport = yarnClient.getApplicationReport(appId);

                LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState()
                        + " and final state " + appReport.getFinalApplicationStatus() + " at "
                        + appReport.getFinishTime());

                if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED
                        || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) {
                    LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
                    LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve "
                            + "the full application log using this command:\n" + "\tyarn logs -applicationId "
                            + appReport.getApplicationId() + "\n"
                            + "(It sometimes takes a few seconds until the logs are aggregated)");
                }
            } catch (Exception e) {
                LOG.warn("Error while getting final application report", e);
            }
        }
    });
    actorRunner.setDaemon(true);
    actorRunner.start();

    pollingRunner = new PollingThread(yarnClient, appId);
    pollingRunner.setDaemon(true);
    pollingRunner.start();

    Runtime.getRuntime().addShutdownHook(clientShutdownHook);

    isConnected = true;
}

From source file:org.apache.flink.yarn.FlinkYarnCluster.java

License:Apache License

@Override
public boolean hasFailed() {
    if (!isConnected) {
        throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
    }//from w  w w . j  a  va 2 s  .  c  o  m
    if (pollingRunner == null) {
        LOG.warn("FlinkYarnCluster.hasFailed() has been called on an uninitialized cluster."
                + "The system might be in an erroneous state");
    }
    ApplicationReport lastReport = pollingRunner.getLastReport();
    if (lastReport == null) {
        LOG.warn(
                "FlinkYarnCluster.hasFailed() has been called on a cluster that didn't receive a status so far."
                        + "The system might be in an erroneous state");
        return false;
    } else {
        YarnApplicationState appState = lastReport.getYarnApplicationState();
        boolean status = (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED);
        if (status) {
            LOG.warn("YARN reported application state {}", appState);
            LOG.warn("Diagnostics: {}", lastReport.getDiagnostics());
        }
        return status;
    }
}

From source file:org.apache.flink.yarn.YarnClusterClient.java

License:Apache License

public ApplicationStatus getApplicationStatus() {
    if (!isConnected) {
        throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
    }/*from w w  w .  j  av a 2 s.  co  m*/
    ApplicationReport lastReport = null;
    if (pollingRunner == null) {
        LOG.warn("YarnClusterClient.getApplicationStatus() has been called on an uninitialized cluster."
                + "The system might be in an erroneous state");
    } else {
        lastReport = pollingRunner.getLastReport();
    }
    if (lastReport == null) {
        LOG.warn(
                "YarnClusterClient.getApplicationStatus() has been called on a cluster that didn't receive a status so far."
                        + "The system might be in an erroneous state");
        return ApplicationStatus.UNKNOWN;
    } else {
        YarnApplicationState appState = lastReport.getYarnApplicationState();
        ApplicationStatus status = (appState == YarnApplicationState.FAILED
                || appState == YarnApplicationState.KILLED) ? ApplicationStatus.FAILED
                        : ApplicationStatus.SUCCEEDED;
        if (status != ApplicationStatus.SUCCEEDED) {
            LOG.warn("YARN reported application state {}", appState);
            LOG.warn("Diagnostics: {}", lastReport.getDiagnostics());
        }
        return status;
    }
}

From source file:org.apache.flink.yarn.YarnClusterClient.java

License:Apache License

/**
 * Shuts down the Yarn application/*from   www .j a v a 2s  .c  o m*/
 */
public void shutdownCluster() {

    if (hasBeenShutDown.getAndSet(true)) {
        return;
    }

    if (!isConnected) {
        throw new IllegalStateException("The cluster has been not been connected to the ApplicationMaster.");
    }

    try {
        Runtime.getRuntime().removeShutdownHook(clientShutdownHook);
    } catch (IllegalStateException e) {
        // we are already in the shutdown hook
    }

    LOG.info("Sending shutdown request to the Application Master");
    try {
        Future<Object> response = Patterns.ask(applicationClient.get(), new YarnMessages.LocalStopYarnSession(
                getApplicationStatus(), "Flink YARN Client requested shutdown"), new Timeout(akkaDuration));
        Await.ready(response, akkaDuration);
    } catch (Exception e) {
        LOG.warn("Error while stopping YARN cluster.", e);
    }

    try {
        File propertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(flinkConfig);
        if (propertiesFile.isFile()) {
            if (propertiesFile.delete()) {
                LOG.info("Deleted Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString());
            } else {
                LOG.warn("Couldn't delete Yarn properties file at {}",
                        propertiesFile.getAbsoluteFile().toString());
            }
        }
    } catch (Exception e) {
        LOG.warn("Exception while deleting the JobManager address file", e);
    }

    if (sessionFilesDir != null) {
        LOG.info("Deleting files in " + sessionFilesDir);
        try {
            FileSystem shutFS = FileSystem.get(hadoopConfig);
            shutFS.delete(sessionFilesDir, true); // delete conf and jar file.
            shutFS.close();
        } catch (IOException e) {
            LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e);
        }
    } else {
        LOG.warn("Session file directory not set. Not deleting session files");
    }

    try {
        pollingRunner.stopRunner();
        pollingRunner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Shutdown of the polling runner was interrupted", e);
        Thread.currentThread().interrupt();
    }

    try {
        ApplicationReport appReport = yarnClient.getApplicationReport(appId);

        LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState()
                + " and final state " + appReport.getFinalApplicationStatus() + " at "
                + appReport.getFinishTime());

        if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED
                || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) {
            LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
            LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve "
                    + "the full application log using this command:" + System.lineSeparator()
                    + "\tyarn logs -applicationId " + appReport.getApplicationId() + System.lineSeparator()
                    + "(It sometimes takes a few seconds until the logs are aggregated)");
        }
    } catch (Exception e) {
        LOG.warn("Couldn't get final report", e);
    }

    LOG.info("YARN Client is shutting down");
    yarnClient.stop(); // actorRunner is using the yarnClient.
    yarnClient = null; // set null to clearly see if somebody wants to access it afterwards.
}

From source file:org.apache.flink.yarn.YarnClusterClientV2.java

License:Apache License

@Override
protected JobSubmissionResult submitJob(JobGraph jobGraph, ClassLoader classLoader)
        throws ProgramInvocationException {
    try {//from ww  w.j  a va  2 s. c  o m
        // Create application via yarnClient
        final YarnClientApplication yarnApplication = yarnClient.createApplication();
        ApplicationReport report = this.clusterDescriptor.startAppMaster(jobGraph, yarnClient, yarnApplication);
        if (report.getYarnApplicationState().equals(YarnApplicationState.RUNNING)) {
            appId = report.getApplicationId();
            trackingURL = report.getTrackingUrl();
            logAndSysout("Please refer to " + getWebInterfaceURL() + " for the running status of job "
                    + jobGraph.getJobID().toString());
            //TODO: not support attach mode now
            return new JobSubmissionResult(jobGraph.getJobID());
        } else {
            throw new ProgramInvocationException("Fail to submit the job.");
        }
    } catch (Exception e) {
        throw new ProgramInvocationException("Fail to submit the job", e.getCause());
    }
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);/* w  w  w  .j  a  v  a 2  s. c  om*/
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
        tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
        tmpInFile = tmp.newFile();
        FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    Runner runner = startWithArgs(
            new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt",
                    flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD",
                    "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
                    "-ytm", "1024", "-ys", "2", // test requesting slots from YARN.
                    "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output",
                    tmpOutFolder.getAbsoluteFile().toString() },
            "Job has been submitted with JobID", RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
        }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

        ApplicationId tmpAppId;
        if (apps.size() == 1) {
            // Better method to find the right appId. But sometimes the app is shutting down very fast
            // Only one running
            tmpAppId = apps.get(0).getApplicationId();

            LOG.info("waiting for the job with appId {} to finish", tmpAppId);
            // wait until the app has finished
            while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
                sleep(500);
            }
        } else {
            // get appId by finding the latest finished appid
            apps = yc.getApplications();
            Collections.sort(apps, new Comparator<ApplicationReport>() {
                @Override
                public int compare(ApplicationReport o1, ApplicationReport o2) {
                    return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
                }
            });
            tmpAppId = apps.get(0).getApplicationId();
            LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
        }
        final ApplicationId id = tmpAppId;

        // now it has finished.
        // check the output files.
        File[] listOfOutputFiles = tmpOutFolder.listFiles();

        Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
        LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

        // read all output files in output folder to one output string
        String content = "";
        for (File f : listOfOutputFiles) {
            if (f.isFile()) {
                content += FileUtils.readFileToString(f) + "\n";
            }
        }
        //String content = FileUtils.readFileToString(taskmanagerOut);
        // check for some of the wordcount outputs.
        Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
                content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
        Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
                content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)"));

        // check if the heap size for the TaskManager was set correctly
        File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString());
            }
        });
        Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
        content = FileUtils.readFileToString(jobmanagerLog);
        // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
        String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m";
        Assert.assertTrue(
                "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
                content.contains(expected));
        expected = " (2/2) (attempt #0) to ";
        Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log."
                + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
                + jobmanagerLog + "'", content.contains(expected));

        // make sure the detached app is really finished.
        LOG.info("Checking again that app has finished");
        ApplicationReport rep;
        do {
            sleep(500);
            rep = yc.getApplicationReport(id);
            LOG.info("Got report {}", rep);
        } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
        LOG.warn("Error while detached yarn session was running", t);
        Assert.fail(t.getMessage());
    }
}

From source file:org.apache.flink.yarn.YarnTestBase.java

License:Apache License

@Before
public void checkClusterEmpty() throws IOException, YarnException {
    if (yarnClient == null) {
        yarnClient = YarnClient.createYarnClient();
        yarnClient.init(yarnConfiguration);
        yarnClient.start();/*from  w w  w  .  ja  va  2  s.  c  o m*/
    }

    List<ApplicationReport> apps = yarnClient.getApplications();
    for (ApplicationReport app : apps) {
        if (app.getYarnApplicationState() != YarnApplicationState.FINISHED
                && app.getYarnApplicationState() != YarnApplicationState.KILLED
                && app.getYarnApplicationState() != YarnApplicationState.FAILED) {
            Assert.fail("There is at least one application on the cluster is not finished." + "App "
                    + app.getApplicationId() + " is in state " + app.getYarnApplicationState());
        }
    }
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Assess whether job is already finished/failed and 'done' flag needs to be
 * set, prints progress display for client if all is going well.
 * @param report the application report to assess.
 * @return true if job report indicates the job run is over.
 */// w  w w .  j  a v  a  2 s.  c  om
private boolean checkProgress(final ApplicationReport report) {
    YarnApplicationState jobState = report.getYarnApplicationState();
    if (jobState == YarnApplicationState.FINISHED || jobState == YarnApplicationState.KILLED) {
        return true;
    } else if (jobState == YarnApplicationState.FAILED) {
        LOG.error(jobName + " reports FAILED state, diagnostics show: " + report.getDiagnostics());
        return true;
    } else {
        if (reportCounter++ % 5 == 0) {
            displayJobReport(report);
        }
    }
    return false;
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Display a formatted summary of the job progress report from the AM.
 * @param report the report to display.//from   w ww .  ja va 2  s.  c om
 */
private void displayJobReport(final ApplicationReport report) {
    if (null == report) {
        throw new IllegalStateException(
                "[*] Latest ApplicationReport for job " + jobName + " was not received by the local client.");
    }
    final float elapsed = (System.currentTimeMillis() - report.getStartTime()) / 1000.0f;
    LOG.info(jobName + ", Elapsed: " + String.format("%.2f secs", elapsed));
    LOG.info(report.getCurrentApplicationAttemptId() + ", State: " + report.getYarnApplicationState().name()
            + ", Containers used: " + report.getApplicationResourceUsageReport().getNumUsedContainers());
}