Example usage for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState

List of usage examples for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ApplicationReport getYarnApplicationState.

Prototype

@Public
@Stable
public abstract YarnApplicationState getYarnApplicationState();

Source Link

Document

Get the YarnApplicationState of the application.

Usage

From source file:gobblin.yarn.GobblinYarnAppLauncher.java

License:Apache License

@Subscribe
public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) {
    ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport();

    YarnApplicationState appState = applicationReport.getYarnApplicationState();
    LOGGER.info("Gobblin Yarn application state: " + appState.toString());

    // Reset the count on failures to get the ApplicationReport when there's one success
    this.getApplicationReportFailureCount.set(0);

    if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.FAILED
            || appState == YarnApplicationState.KILLED) {

        applicationCompleted = true;/*from   ww  w . ja v  a  2  s  .c o m*/

        LOGGER.info("Gobblin Yarn application finished with final status: "
                + applicationReport.getFinalApplicationStatus().toString());
        if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) {
            LOGGER.error("Gobblin Yarn application failed for the following reason: "
                    + applicationReport.getDiagnostics());
        }

        try {
            GobblinYarnAppLauncher.this.stop();
        } catch (IOException ioe) {
            LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
        } catch (TimeoutException te) {
            LOGGER.error("Timeout in stopping the service manager", te);
        } finally {
            if (this.emailNotificationOnShutdown) {
                sendEmailOnShutdown(Optional.of(applicationReport));
            }
        }
    }
}

From source file:husky.client.HuskyYarnClient.java

License:Apache License

private boolean monitorApp() throws YarnException, IOException {
    while (true) {
        try {// ww  w  .j av a2 s .  c  o m
            Thread.sleep(1000);
        } catch (InterruptedException ignore) {
        }

        ApplicationReport report = mYarnClient.getApplicationReport(mAppId);

        YarnApplicationState yarnState = report.getYarnApplicationState();
        FinalApplicationStatus appState = report.getFinalApplicationStatus();
        LOG.info("YarnState = " + yarnState + ", AppState = " + appState + ", Progress = "
                + report.getProgress());

        if (yarnState == FINISHED) {
            if (appState == SUCCEEDED) {
                LOG.info("Application completed successfully.");
                return true;
            } else {
                LOG.info("Application completed unsuccessfully. YarnState: " + yarnState.toString()
                        + ", ApplicationState: " + appState.toString());
                return false;
            }
        } else if (yarnState == KILLED || yarnState == FAILED) {
            LOG.info("Application did not complete. YarnState: " + yarnState.toString() + ", ApplicationState: "
                    + appState.toString());
            return false;
        }
    }
}

From source file:hws.core.JobClient.java

License:Apache License

public void run(String[] args) throws Exception {
    //final String command = args[0];
    //final int n = Integer.valueOf(args[1]);
    //final Path jarPath = new Path(args[2]);
    Options options = new Options();
    /*options.addOption(OptionBuilder.withLongOpt("jar")
                           .withDescription( "Jar path" )
                           .hasArg()/*from  w ww  .  ja va  2  s  .  c  om*/
                           .withArgName("JarPath")
                           .create());
    options.addOption(OptionBuilder.withLongOpt("scheduler")
                           .withDescription( "Scheduler class name" )
                           .hasArg()
                           .withArgName("ClassName")
                           .create());
    */options.addOption(OptionBuilder.withLongOpt("zk-servers")
            .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks"));
    //options.addOption("l", "list", false, "list modules");
    options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs()
            .withArgName("XMLFiles").create());
    /*options.addOption(OptionBuilder.withLongOpt( "remove" )
                           .withDescription( "remove modules" )
                           .hasArgs()
                           .withArgName("ModuleNames")
                           .create("rm"));
    */CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);

    //Path jarPath = null;
    //String schedulerClassName = null;
    String[] xmlFileNames = null;
    //String []moduleNames = null;
    String zksArgs = "";
    String[] zkServers = null;
    if (cmd.hasOption("zks")) {
        zksArgs = "-zks";
        zkServers = cmd.getOptionValues("zks");
        for (String zks : zkServers) {
            zksArgs += " " + zks;
        }
    }

    //Logger setup
    //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log"));
    //Logger.addOutputStream(writer);

    /*if(cmd.hasOption("l")){
       LOG.warn("Argument --list (-l) is not supported yet.");
    }
    if(cmd.hasOption("jar")){
       jarPath = new Path(cmd.getOptionValue("jar")); 
    }
    if(cmd.hasOption("scheduler")){
       schedulerClassName = cmd.getOptionValue("scheduler");
    }*/
    if (cmd.hasOption("load")) {
        xmlFileNames = cmd.getOptionValues("load");
    } /*else if(cmd.hasOption("rm")){
        moduleNames = cmd.getOptionValues("rm");
      }*/

    //LOG.info("Jar-Path "+jarPath);
    if (xmlFileNames != null) {
        String paths = "";
        for (String path : xmlFileNames) {
            paths += path + "; ";
        }
        LOG.info("Load XMLs: " + paths);
    }
    /*if(moduleNames!=null){
       String modules = "";
       for(String module: moduleNames){
          modules += module+"; ";
       }
       LOG.info("remove: "+modules);
    }*/
    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server
    if (!zk.exists("/hadoop-watershed")) {
        zk.createPersistent("/hadoop-watershed", "");
    }
    zk.createPersistent("/hadoop-watershed/" + appId.toString(), "");

    FileSystem fs = FileSystem.get(conf);

    LOG.info("Collecting files to upload");
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString()));
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs"));

    ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames);
    LOG.info("Uploading files to HDFS");
    for (String path : modulePipeline.files()) {
        uploadFile(fs, new File(path), appId);
    }
    LOG.info("Upload finished");

    String modulePipelineJson = Json.dumps(modulePipeline);
    String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson))
            .replaceAll("\\s", "");
    LOG.info("ModulePipeline: " + modulePipelineJson);
    //LOG.info("ModulePipeline: "+modulePipelineBase64);
    amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M"
            + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " "
            + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

    // Setup jar for ApplicationMaster
    //LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    //setupAppMasterJar(jarPath, appMasterJar);
    //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar));

    LOG.info("Listing files for YARN-Watershed");
    RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false);
    Map<String, LocalResource> resources = new HashMap<String, LocalResource>();
    LOG.info("Files setup as resource");
    while (filesIterator.hasNext()) {
        LocatedFileStatus fileStatus = filesIterator.next();
        // Setup jar for ApplicationMaster
        LocalResource containerJar = Records.newRecord(LocalResource.class);
        ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
        resources.put(fileStatus.getPath().getName(), containerJar);
    }
    LOG.info("container resource setup");
    amContainer.setLocalResources(resources);

    fs.close(); //closing FileSystem interface

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    ContainerUtils.setupContainerEnv(appMasterEnv, conf);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    //ApplicationSubmissionContext appContext = 
    //app.getApplicationSubmissionContext();
    appContext.setApplicationName("Hadoop-Watershed"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue 

    // Submit application
    LOG.info("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for containers to finish");
    zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250);
    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        Thread.sleep(100);
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    System.out.println("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());

    System.out.println("deleting " + appId.toString() + " znode");
    zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper
}

From source file:io.hops.hopsworks.api.zeppelin.rest.InterpreterRestApi.java

License:Apache License

private List<JobAdministration.YarnApplicationReport> fetchJobs() {
    JobAdministration jobAdmin = new JobAdministration();
    List<JobAdministration.YarnApplicationReport> reports = new ArrayList<>();
    YarnClient client = YarnClient.createYarnClient();
    Configuration conf = settings.getConfiguration();
    client.init(conf);//from   ww  w. j av  a2 s  . c om
    client.start();
    try {
        //Create our custom YarnApplicationReport Pojo
        for (ApplicationReport appReport : client.getApplications(PREDICATE)) {
            reports.add(jobAdmin.new YarnApplicationReport(appReport.getApplicationId().toString(),
                    appReport.getName(), appReport.getUser(), appReport.getStartTime(),
                    appReport.getFinishTime(), appReport.getApplicationId().getClusterTimestamp(),
                    appReport.getApplicationId().getId(), appReport.getYarnApplicationState().name()));
        }
    } catch (YarnException | IOException ex) {
        logger.error("", ex);
    }
    return reports;
}

From source file:io.hops.hopsworks.api.zeppelin.rest.InterpreterRestApi.java

License:Apache License

private List<JobAdministration.YarnApplicationReport> fetchJobs(String username) {
    JobAdministration jobAdmin = new JobAdministration();
    List<JobAdministration.YarnApplicationReport> reports = new ArrayList<>();
    YarnClient client = YarnClient.createYarnClient();
    Configuration conf = settings.getConfiguration();
    client.init(conf);/*from ww  w .  jav  a 2  s .co m*/
    client.start();
    try {
        //Create our custom YarnApplicationReport Pojo
        for (ApplicationReport appReport : client.getApplications(PREDICATE)) {
            if (username.equals(appReport.getUser())) {
                reports.add(jobAdmin.new YarnApplicationReport(appReport.getApplicationId().toString(),
                        appReport.getName(), appReport.getUser(), appReport.getStartTime(),
                        appReport.getFinishTime(), appReport.getApplicationId().getClusterTimestamp(),
                        appReport.getApplicationId().getId(), appReport.getYarnApplicationState().name()));
            }
        }
    } catch (YarnException | IOException ex) {
        logger.error("", ex);
    }
    return reports;
}

From source file:io.hops.hopsworks.common.admin.llap.LlapClusterFacade.java

License:Open Source License

public boolean isClusterUp() {
    String llapAppID = variablesFacade.getVariableValue(Settings.VARIABLE_LLAP_APP_ID);
    if (llapAppID == null || llapAppID.isEmpty()) {
        return false;
    }// ww w. java  2s  .  c  o m

    ApplicationId appId = ApplicationId.fromString(llapAppID);
    YarnClient yarnClient = yarnClientService.getYarnClientSuper(settings.getConfiguration()).getYarnClient();
    ApplicationReport applicationReport = null;
    try {
        applicationReport = yarnClient.getApplicationReport(appId);
    } catch (IOException | YarnException e) {
        logger.log(Level.SEVERE,
                "Could not retrieve application state for llap cluster with appId: " + appId.toString(), e);
        return false;
    } finally {
        try {
            yarnClient.close();
        } catch (IOException ex) {
        }
    }

    YarnApplicationState appState = applicationReport.getYarnApplicationState();
    return appState == YarnApplicationState.RUNNING || appState == YarnApplicationState.SUBMITTED
            || appState == YarnApplicationState.ACCEPTED || appState == YarnApplicationState.NEW
            || appState == YarnApplicationState.NEW_SAVING;
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN.//  w w  w . ja v  a2 s  .  co  m
 */
protected YarnClusterClient deployInternal() throws Exception {
    isReadyForDeployment();
    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    final YarnClient yarnClient = getYarnClient();

    // ------------------ Check if the specified queue exists --------------------
    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        // check only if there are queues configured in yarn and for this session.
        if (queues.size() > 0 && this.yarnQueue != null) {
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    Map<String, String> jobSystemProperties = new HashMap<>(2);

    // Certificates are materialized locally so DFSClient can be set to null
    // LocalResources are not used by Flink, so set it null
    HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username,
            services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(),
            JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(),
            appResponse.getApplicationId().toString());

    StringBuilder tmpBuilder = new StringBuilder();
    for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) {
        String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue());
        javaOptions.add(option);
        addHopsworksParam(option);
        tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@");
    }

    dynamicPropertiesEncoded += tmpBuilder.toString();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------
    Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    // ------------------ Set default file system scheme -------------------------
    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the 
    // getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is "
                + "using the default Hadoop configuration values. The Flink YARN "
                + "client needs to store its files in a distributed file system");
    }

    // ------ Check if the YARN ClusterClient has the requested resources ---
    // the yarnMinAllocationMB specifies the smallest possible container 
    // allocation size. all allocations below this value are automatically 
    // set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible "
                + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is "
                + yarnMinAllocationMB + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will"
                + " account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the "
            + "'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager"
                + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: "
                + jobManagerMemoryMb + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, "
            + "but maybe not all TaskManagers are connecting from the beginning "
            + "because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink "
            + "YARN client needs to wait until the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.
    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace);

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        classPathBuilder.append(shipFile.getName());
        if (shipFile.isDirectory()) {
            // add directories to the classpath
            classPathBuilder.append(File.separator).append("*");
        }
        classPathBuilder.append(File.pathSeparator);

        envShipFileList.append(remotePath).append(",");
    }
    ////////////////////////////////////////////////////////////////////////////
    /*
     * Add Hops LocalResources paths here
     *
     */
    //Add it to localResources
    for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) {
        localResources.put(entry.getKey(), entry.getValue());
        //Append name to classPathBuilder
        classPathBuilder.append(entry.getKey());
        classPathBuilder.append(File.pathSeparator);
    }

    ////////////////////////////////////////////////////////////////////////////
    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    // setup security tokens
    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this "
                    + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info("Deployment took more than 60 seconds. Please check if the "
                        + "requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }

    String host = report.getHost();
    int port = report.getRpcPort();

    // Correctly initialize the Flink config
    flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
    flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);

    // the Flink cluster is deployed in YARN. Represent cluster
    return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true);
}

From source file:io.hops.hopsworks.common.jobs.flink.YarnClusterClient.java

License:Apache License

public ApplicationStatus getApplicationStatus() {
    if (!isConnected) {
        throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
    }//from w  ww.j av a2s.co  m
    ApplicationReport lastReport = null;
    if (pollingRunner == null) {
        LOG.warn("YarnClusterClient.getApplicationStatus() has been called on an uninitialized cluster."
                + "The system might be in an erroneous state");
    } else {
        lastReport = pollingRunner.getLastReport();
    }
    if (lastReport == null) {
        LOG.warn("YarnClusterClient.getApplicationStatus() has been called on a "
                + "cluster that didn't receive a status so far." + "The system might be in an erroneous state");
        return ApplicationStatus.UNKNOWN;
    } else {
        YarnApplicationState appState = lastReport.getYarnApplicationState();
        ApplicationStatus status = (appState == YarnApplicationState.FAILED
                || appState == YarnApplicationState.KILLED) ? ApplicationStatus.FAILED
                        : ApplicationStatus.SUCCEEDED;
        if (status != ApplicationStatus.SUCCEEDED) {
            LOG.warn("YARN reported application state {}", appState);
            LOG.warn("Diagnostics: {}", lastReport.getDiagnostics());
        }
        return status;
    }
}

From source file:io.hops.hopsworks.common.jobs.flink.YarnClusterClient.java

License:Apache License

/**
 * Shuts down the Yarn application//from  ww w . j ava 2s  .co  m
 */
public void shutdownCluster() {

    if (hasBeenShutDown.getAndSet(true)) {
        return;
    }

    if (!isConnected) {
        throw new IllegalStateException("The cluster has been not been connected to the ApplicationMaster.");
    }

    try {
        Runtime.getRuntime().removeShutdownHook(clientShutdownHook);
    } catch (IllegalStateException e) {
        // we are already in the shutdown hook
    }

    LOG.info("Sending shutdown request to the Application Master");
    try {
        Future<Object> response = Patterns.ask(applicationClient.get(), new YarnMessages.LocalStopYarnSession(
                getApplicationStatus(), "Flink YARN Client requested shutdown"), new Timeout(akkaDuration));
        Await.ready(response, akkaDuration);
    } catch (Exception e) {
        LOG.warn("Error while stopping YARN cluster.", e);
    }

    try {
        File propertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(flinkConfig);
        if (propertiesFile.isFile()) {
            if (propertiesFile.delete()) {
                LOG.info("Deleted Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString());
            } else {
                LOG.warn("Couldn't delete Yarn properties file at {}",
                        propertiesFile.getAbsoluteFile().toString());
            }
        }
    } catch (Exception e) {
        LOG.warn("Exception while deleting the JobManager address file", e);
    }

    if (sessionFilesDir != null) {
        LOG.info("Deleting files in " + sessionFilesDir);
        try {
            FileSystem shutFS = FileSystem.get(hadoopConfig);
            shutFS.delete(sessionFilesDir, true); // delete conf and jar file.
            shutFS.close();
        } catch (IOException e) {
            LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e);
        }
    } else {
        LOG.warn("Session file directory not set. Not deleting session files");
    }

    try {
        pollingRunner.stopRunner();
        pollingRunner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Shutdown of the polling runner was interrupted", e);
        Thread.currentThread().interrupt();
    }

    try {
        ApplicationReport appReport = yarnClient.getApplicationReport(appId);

        LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState()
                + " and final state " + appReport.getFinalApplicationStatus() + " at "
                + appReport.getFinishTime());

        if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED
                || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) {
            LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
            LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve "
                    + "the full application log using this command:\n" + "\tyarn logs -appReport "
                    + appReport.getApplicationId() + "\n"
                    + "(It sometimes takes a few seconds until the logs are aggregated)");
        }
    } catch (Exception e) {
        LOG.warn("Couldn't get final report", e);
    }

    LOG.info("YARN Client is shutting down");
    yarnClient.stop(); // actorRunner is using the yarnClient.
    yarnClient = null; // set null to clearly see if somebody wants to access it afterwards.
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

/**
 * Monitor the submitted application for completion.
 * Kill application if time expires./*from ww  w. j a va 2 s.  c  o m*/
 *
 * @param appId
 *     Application Id of application to be monitored
 * @return true if application completed successfully
 * @throws YarnException
 * @throws IOException
 */
public boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {

    YarnApplicationState oldState = null;

    while (true) {

        // Check app status every 1 second.
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.debug("Thread sleep in monitoring loop interrupted");
        }

        ApplicationReport report = yarnClient.getApplicationReport(appId);
        YarnApplicationState state = report.getYarnApplicationState();
        FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();

        if (oldState != state) {
            LOG.info("Got application report from ASM for" + "\n\t appId=" + appId.getId()
                    + "\n\t clientToAMToken=" + report.getClientToAMToken() + "\n\t appDiagnostics="
                    + report.getDiagnostics() + "\n\t appMasterHost=" + report.getHost() + "\n\t appQueue="
                    + report.getQueue() + "\n\t appMasterRpcPort=" + report.getRpcPort() + "\n\t appStartTime="
                    + report.getStartTime() + "\n\t yarnAppState=" + report.getYarnApplicationState().toString()
                    + "\n\t distributedFinalState=" + report.getFinalApplicationStatus().toString()
                    + "\n\t appTrackingUrl=" + report.getTrackingUrl() + "\n\t appUser=" + report.getUser());
            oldState = state;
        } else {
            LOG.info("Got application report from ASM for " + appId + " (state: " + state + ")");
        }

        if (YarnApplicationState.FINISHED == state) {
            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                LOG.info("Application has completed successfully. Breaking monitoring loop");
                return true;
            } else {
                LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString()
                        + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }
        } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
            LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus="
                    + dsStatus.toString() + ". Breaking monitoring loop");
            return false;
        }

        if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
            LOG.info("Reached client specified timeout for application. Killing application");
            forceKillApplication(appId);
            return false;
        }
    }
}