Example usage for org.apache.hadoop.yarn.api.records ApplicationId toString

List of usage examples for org.apache.hadoop.yarn.api.records ApplicationId toString

Introduction

In this page you can find the example usage for org.apache.hadoop.yarn.api.records ApplicationId toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java

License:Apache License

@Override
public void thawApp(final String appId) throws YarnException, IOException, InterruptedException {
    ApplicationId applicationId = invokeSliderClientRunnable(new SliderClientContextRunnable<ApplicationId>() {
        @Override//w w  w.  j av  a2s  . c  o m
        public ApplicationId run(SliderClient sliderClient)
                throws YarnException, IOException, InterruptedException {
            Set<String> properties = new HashSet<String>();
            properties.add("id");
            properties.add("name");
            final SliderApp sliderApp = getSliderApp(appId, properties);
            if (sliderApp == null)
                throw new ApplicationNotFoundException(appId);
            ActionThawArgs thawArgs = new ActionThawArgs();
            sliderClient.actionThaw(sliderApp.getName(), thawArgs);
            return sliderClient.applicationId;
        }
    });
    logger.info("Thawed Slider App [" + appId + "] with response: " + applicationId.toString());
}

From source file:org.apache.drill.yarn.client.KillCommand.java

License:Apache License

@Override
public void run() throws ClientException {
    ApplicationId appId = checkAppId();
    if (appId == null) {
        System.exit(-1);/*from   ww  w. ja va 2 s. c  o m*/
    }
    YarnRMClient client = new YarnRMClient(appId);
    try {
        client.killApplication();
    } catch (YarnClientException e) {
        throw new ClientException(e);
    }
    System.out.println("Kill request sent, waiting for shut-down.");
    try {
        client.waitForCompletion();
    } catch (YarnClientException e) {
        throw new ClientException("Wait for completion failed for app id: " + appId.toString(), e);
    }
    System.out.println("Application completed: " + appId.toString());
}

From source file:org.apache.drill.yarn.client.StartCommand.java

License:Apache License

/**
 * Check if an application ID file exists. If it does, check if an application
 * is running. If an app is running, then we can't start a new one. If the app
 * is not running, then clean up the "orphan" app id file.
 *
 * @throws ClientException//from  w  ww . j a v  a 2s  .c o m
 */

private void checkExistingApp() throws ClientException {
    File appIdFile = getAppIdFile();
    if (!appIdFile.exists()) {
        return;
    }

    // File exists. Ask YARN about status.

    Reporter reporter;
    ApplicationId appId;
    try {
        System.out.println("Found app ID file: " + appIdFile.getAbsolutePath());
        appId = checkAppId();
        System.out.print("Checking application ID: " + appId.toString() + "...");
        YarnRMClient client = new YarnRMClient(appId);
        reporter = new Reporter(client);
        reporter.getReport();
    } catch (ClientException e) {
        // This exception occurs when we ask for a report about an application
        // that
        // YARN does not know about. (YARN has likely been restarted.)

        System.out.println(" Not running.");
        appIdFile.delete();
        return;
    }

    // YARN knows about the application. But, was it stopped, perhaps from the
    // web UI?

    if (reporter.isStopped()) {
        System.out.println(" Completed with state " + reporter.getState());
        appIdFile.delete();
        return;
    }

    // The app (or another one with the same App ID) is running.

    System.out.println(" Still running!");
    throw new ClientException("Error: AM already running as Application ID: " + appId);
}

From source file:org.apache.flink.yarn.AbstractYarnClusterDescriptor.java

License:Apache License

public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient,
        YarnClientApplication yarnApplication) throws Exception {

    // ------------------ Set default file system scheme -------------------------

    try {//w w w  . j  a  va 2 s.  co m
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    // add the user jar to the classpath of the to-be-created cluster
    if (userJarFiles != null) {
        effectiveShipFiles.addAll(userJarFiles);
    }

    // Set-up ApplicationSubmissionContext for the application

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);

    if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        if (shipFile.isDirectory()) {
            // add directories to the classpath
            java.nio.file.Path shipPath = shipFile.toPath();
            final java.nio.file.Path parentPath = shipPath.getParent();

            Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() {
                @Override
                public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs)
                        throws IOException {
                    super.preVisitDirectory(dir, attrs);

                    java.nio.file.Path relativePath = parentPath.relativize(dir);

                    classPathBuilder.append(relativePath).append(File.separator).append("*")
                            .append(File.pathSeparator);

                    return FileVisitResult.CONTINUE;
                }
            });
        } else {
            // add files to the classpath
            classPathBuilder.append(shipFile.getName()).append(File.pathSeparator);
        }

        envShipFileList.append(remotePath).append(",");
    }

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    // write job graph to tmp file and add it to local resource
    // TODO: server use user main method to generate job graph
    if (jobGraph != null) {
        try {
            File fp = File.createTempFile(appId.toString(), null);
            fp.deleteOnExit();
            try (FileOutputStream output = new FileOutputStream(fp);
                    ObjectOutputStream obOutput = new ObjectOutputStream(output);) {
                obOutput.writeObject(jobGraph);
            }
            LocalResource jobgraph = Records.newRecord(LocalResource.class);
            Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph,
                    fs.getHomeDirectory());
            localResources.put("job.graph", jobgraph);
            paths.add(remoteJobGraph);
            classPathBuilder.append("job.graph").append(File.pathSeparator);
        } catch (Exception e) {
            LOG.warn("Add job graph to local resource fail");
            throw e;
        }
    }

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    //To support Yarn Secure Integration Test Scenario
    //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
    //and KRB5 configuration files. We are adding these files as container local resources for the container
    //applications (JM/TMs) to have proper secure cluster setup
    Path remoteKrb5Path = null;
    Path remoteYarnSiteXmlPath = null;
    boolean hasKrb5 = false;
    if (System.getenv("IN_TESTS") != null) {
        String krb5Config = System.getProperty("java.security.krb5.conf");
        if (krb5Config != null && krb5Config.length() != 0) {
            File krb5 = new File(krb5Config);
            LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket",
                    krb5.getAbsolutePath());
            LocalResource krb5ConfResource = Records.newRecord(LocalResource.class);
            Path krb5ConfPath = new Path(krb5.getAbsolutePath());
            remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource,
                    fs.getHomeDirectory());
            localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);

            File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME);
            LOG.info("Adding Yarn configuration {} to the AM container local resource bucket",
                    f.getAbsolutePath());
            LocalResource yarnConfResource = Records.newRecord(LocalResource.class);
            Path yarnSitePath = new Path(f.getAbsolutePath());
            remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath,
                    yarnConfResource, fs.getHomeDirectory());
            localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);

            hasKrb5 = true;
        }
    }

    // setup security tokens
    LocalResource keytabResource = null;
    Path remotePathKeytab = null;
    String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
    if (keytab != null) {
        LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
        keytabResource = Records.newRecord(LocalResource.class);
        Path keytabPath = new Path(keytab);
        remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource,
                fs.getHomeDirectory());
        localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
    }

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5);

    if (UserGroupInformation.isSecurityEnabled() && keytab == null) {
        //set tokens only when keytab is not provided
        LOG.info("Adding delegation token to the AM container..");
        Utils.setTokensFor(amContainer, paths, conf);
    }

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
    appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    if (keytabResource != null) {
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString());
        String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);
    }

    //To support Yarn Secure Integration Test Scenario
    if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
        appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());
    }

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name);
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    setApplicationTags(appContext);

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster.", e);
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        LOG.debug("Application State: {}", appState);
        switch (appState) {
        case FAILED:
        case FINISHED: //TODO: the finished state may be valid in flip-6
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    return report;
}

From source file:org.apache.flink.yarn.Client.java

License:Apache License

public void run(String[] args) throws Exception {

    if (UserGroupInformation.isSecurityEnabled()) {
        throw new RuntimeException("Flink YARN client does not have security support right now."
                + "File a bug, we will fix it asap");
    }/* w w w  .  j  av  a  2 s .com*/
    //Utils.logFilesInCurrentDirectory(LOG);
    //
    //   Command Line Options
    //
    Options options = new Options();
    options.addOption(VERBOSE);
    options.addOption(FLINK_CONF_DIR);
    options.addOption(FLINK_JAR);
    options.addOption(JM_MEMORY);
    options.addOption(TM_MEMORY);
    options.addOption(TM_CORES);
    options.addOption(CONTAINER);
    options.addOption(GEN_CONF);
    options.addOption(QUEUE);
    options.addOption(QUERY);
    options.addOption(SHIP_PATH);

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
    } catch (MissingOptionException moe) {
        System.out.println(moe.getMessage());
        printUsage();
        System.exit(1);
    }

    if (System.getProperty("log4j.configuration") == null) {
        Logger root = Logger.getRootLogger();
        root.removeAllAppenders();
        PatternLayout layout = new PatternLayout("%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n");
        ConsoleAppender appender = new ConsoleAppender(layout, "System.err");
        root.addAppender(appender);
        if (cmd.hasOption(VERBOSE.getOpt())) {
            root.setLevel(Level.DEBUG);
            LOG.debug("CLASSPATH: " + System.getProperty("java.class.path"));
        } else {
            root.setLevel(Level.INFO);
        }
    }

    // Jar Path
    Path localJarPath;
    if (cmd.hasOption(FLINK_JAR.getOpt())) {
        String userPath = cmd.getOptionValue(FLINK_JAR.getOpt());
        if (!userPath.startsWith("file://")) {
            userPath = "file://" + userPath;
        }
        localJarPath = new Path(userPath);
    } else {
        localJarPath = new Path(
                "file://" + Client.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    }

    if (cmd.hasOption(GEN_CONF.getOpt())) {
        LOG.info("Placing default configuration in current directory");
        File outFile = generateDefaultConf(localJarPath);
        LOG.info("File written to " + outFile.getAbsolutePath());
        System.exit(0);
    }

    // Conf Path 
    Path confPath = null;
    String confDirPath = "";
    if (cmd.hasOption(FLINK_CONF_DIR.getOpt())) {
        confDirPath = cmd.getOptionValue(FLINK_CONF_DIR.getOpt()) + "/";
        File confFile = new File(confDirPath + CONFIG_FILE_NAME);
        if (!confFile.exists()) {
            LOG.fatal("Unable to locate configuration file in " + confFile);
            System.exit(1);
        }
        confPath = new Path(confFile.getAbsolutePath());
    } else {
        System.out.println("No configuration file has been specified");

        // no configuration path given.
        // -> see if there is one in the current directory
        File currDir = new File(".");
        File[] candidates = currDir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(final File dir, final String name) {
                return name != null && name.endsWith(".yaml");
            }
        });
        if (candidates == null || candidates.length == 0) {
            System.out.println(
                    "No configuration file has been found in current directory.\n" + "Copying default.");
            File outFile = generateDefaultConf(localJarPath);
            confPath = new Path(outFile.toURI());
        } else {
            if (candidates.length > 1) {
                System.out.println("Multiple .yaml configuration files were found in the current directory\n"
                        + "Please specify one explicitly");
                System.exit(1);
            } else if (candidates.length == 1) {
                confPath = new Path(candidates[0].toURI());
            }
        }
    }
    List<File> shipFiles = new ArrayList<File>();
    // path to directory to ship
    if (cmd.hasOption(SHIP_PATH.getOpt())) {
        String shipPath = cmd.getOptionValue(SHIP_PATH.getOpt());
        File shipDir = new File(shipPath);
        if (shipDir.isDirectory()) {
            shipFiles = new ArrayList<File>(Arrays.asList(shipDir.listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    return !(name.equals(".") || name.equals(".."));
                }
            })));
        } else {
            LOG.warn("Ship directory is not a directory!");
        }
    }
    boolean hasLog4j = false;
    //check if there is a log4j file
    if (confDirPath.length() > 0) {
        File l4j = new File(confDirPath + "/log4j.properties");
        if (l4j.exists()) {
            shipFiles.add(l4j);
            hasLog4j = true;
        }
    }

    // queue
    String queue = "default";
    if (cmd.hasOption(QUEUE.getOpt())) {
        queue = cmd.getOptionValue(QUEUE.getOpt());
    }

    // JobManager Memory
    int jmMemory = 512;
    if (cmd.hasOption(JM_MEMORY.getOpt())) {
        jmMemory = Integer.valueOf(cmd.getOptionValue(JM_MEMORY.getOpt()));
    }

    // Task Managers memory
    int tmMemory = 1024;
    if (cmd.hasOption(TM_MEMORY.getOpt())) {
        tmMemory = Integer.valueOf(cmd.getOptionValue(TM_MEMORY.getOpt()));
    }

    // Task Managers vcores
    int tmCores = 1;
    if (cmd.hasOption(TM_CORES.getOpt())) {
        tmCores = Integer.valueOf(cmd.getOptionValue(TM_CORES.getOpt()));
    }
    Utils.getFlinkConfiguration(confPath.toUri().getPath());
    int jmPort = GlobalConfiguration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, 0);
    if (jmPort == 0) {
        LOG.warn("Unable to find job manager port in configuration!");
        jmPort = ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT;
    }
    conf = Utils.initializeYarnConfiguration();

    // intialize HDFS
    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    final FileSystem fs = FileSystem.get(conf);

    if (fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Create yarnClient
    final YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Query cluster for metrics
    if (cmd.hasOption(QUERY.getOpt())) {
        showClusterMetrics(yarnClient);
    }
    if (!cmd.hasOption(CONTAINER.getOpt())) {
        LOG.fatal("Missing required argument " + CONTAINER.getOpt());
        printUsage();
        yarnClient.stop();
        System.exit(1);
    }

    // TM Count
    final int taskManagerCount = Integer.valueOf(cmd.getOptionValue(CONTAINER.getOpt()));

    System.out.println("Using values:");
    System.out.println("\tContainer Count = " + taskManagerCount);
    System.out.println("\tJar Path = " + localJarPath.toUri().getPath());
    System.out.println("\tConfiguration file = " + confPath.toUri().getPath());
    System.out.println("\tJobManager memory = " + jmMemory);
    System.out.println("\tTaskManager memory = " + tmMemory);
    System.out.println("\tTaskManager cores = " + tmCores);

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    Resource maxRes = appResponse.getMaximumResourceCapability();
    if (tmMemory > maxRes.getMemory() || tmCores > maxRes.getVirtualCores()) {
        LOG.fatal("The cluster does not have the requested resources for the TaskManagers available!\n"
                + "Maximum Memory: " + maxRes.getMemory() + ", Maximum Cores: " + tmCores);
        yarnClient.stop();
        System.exit(1);
    }
    if (jmMemory > maxRes.getMemory()) {
        LOG.fatal("The cluster does not have the requested resources for the JobManager available!\n"
                + "Maximum Memory: " + maxRes.getMemory());
        yarnClient.stop();
        System.exit(1);
    }
    int totalMemoryRequired = jmMemory + tmMemory * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.fatal("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available.");
        yarnClient.stop();
        System.exit(1);
    }
    if (tmMemory > freeClusterMem.containerLimit) {
        LOG.fatal("The requested amount of memory for the TaskManagers (" + tmMemory + "MB) is more than "
                + "the largest possible YARN container: " + freeClusterMem.containerLimit);
        yarnClient.stop();
        System.exit(1);
    }
    if (jmMemory > freeClusterMem.containerLimit) {
        LOG.fatal("The requested amount of memory for the JobManager (" + jmMemory + "MB) is more than "
                + "the largest possible YARN container: " + freeClusterMem.containerLimit);
        yarnClient.stop();
        System.exit(1);
    }

    // respect custom JVM options in the YAML file
    final String javaOpts = GlobalConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx" + Utils.calculateHeapSize(jmMemory) + "M " + javaOpts;
    if (hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
                + "/jobmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
    }
    amCommand += " org.apache.flink.yarn.ApplicationMaster" + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log";
    amContainer.setCommands(Collections.singletonList(amCommand));

    System.err.println("amCommand=" + amCommand);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), localJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), confPath, flinkConf,
            fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[3 + shipFiles.size()];
    StringBuffer envShipFileList = new StringBuffer();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[3 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[3 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    paths[2] = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");
    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
    fs.setPermission(paths[2], permission); // set permission for path.
    Utils.setTokensFor(amContainer, paths, this.conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    Utils.setupEnv(conf, appMasterEnv);
    // set configuration values
    appMasterEnv.put(Client.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(Client.ENV_TM_CORES, String.valueOf(tmCores));
    appMasterEnv.put(Client.ENV_TM_MEMORY, String.valueOf(tmMemory));
    appMasterEnv.put(Client.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(Client.ENV_APP_ID, appId.toString());
    appMasterEnv.put(Client.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(Client.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(Client.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName());

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jmMemory);
    capability.setVirtualCores(1);

    appContext.setApplicationName("Flink"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue(queue);

    // file that we write into the conf/ dir containing the jobManager address.
    final File addrFile = new File(confDirPath + CliFrontend.JOBMANAGER_ADDRESS_FILE);

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            try {
                LOG.info("Killing the Flink-YARN application.");
                yarnClient.killApplication(appId);
                LOG.info("Deleting files in " + paths[2]);
                FileSystem shutFS = FileSystem.get(conf);
                shutFS.delete(paths[2], true); // delete conf and jar file.
                shutFS.close();
            } catch (Exception e) {
                LOG.warn("Exception while killing the YARN application", e);
            }
            try {
                addrFile.delete();
            } catch (Exception e) {
                LOG.warn("Exception while deleting the jobmanager address file", e);
            }
            LOG.info("YARN Client is shutting down");
            yarnClient.stop();
        }
    });

    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);
    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    boolean told = false;
    char[] el = { '/', '|', '\\', '-' };
    int i = 0;
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        if (!told && appState == YarnApplicationState.RUNNING) {
            System.err.println("Flink JobManager is now running on " + appReport.getHost() + ":" + jmPort);
            System.err.println("JobManager Web Interface: " + appReport.getTrackingUrl());
            // write jobmanager connect information

            PrintWriter out = new PrintWriter(addrFile);
            out.println(appReport.getHost() + ":" + jmPort);
            out.close();
            addrFile.setReadable(true, false); // readable for all.
            told = true;
        }
        if (!told) {
            System.err.print(el[i++] + "\r");
            if (i == el.length) {
                i = 0;
            }
            Thread.sleep(500); // wait for the application to switch to RUNNING
        } else {
            Thread.sleep(5000);
        }

        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    LOG.info("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());
    if (appState == YarnApplicationState.FAILED || appState == YarnApplicationState.KILLED) {
        LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
    }

}

From source file:org.apache.flink.yarn.FlinkYarnClient.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN./* ww  w .  jav a 2 s.c  o m*/
 */
protected AbstractFlinkYarnCluster deployInternal() throws Exception {
    isReadyForDepoyment();

    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    // Create application via yarnClient
    yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------

    List<Tuple2<String, String>> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded);
    for (Tuple2<String, String> dynProperty : dynProperties) {
        flinkConfiguration.setString(dynProperty.f0, dynProperty.f1);
    }

    // ------------------ Check if the specified queue exists --------------

    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // ------------------ Check if the YARN Cluster has the requested resources --------------

    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
                + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB
                + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the JobManager available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb
                        + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are "
            + "connecting from the beginning because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink YARN client needs to wait until "
            + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.

    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    // ------------------ Prepare Application Master Container  ------------------------------

    // respect custom JVM options in the YAML file
    final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    boolean hasLogback = new File(logbackFile).exists();
    String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;

    boolean hasLog4j = new File(log4jFile).exists();
    if (hasLogback) {
        shipFiles.add(new File(logbackFile));
    }
    if (hasLog4j) {
        shipFiles.add(new File(log4jFile));
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx"
            + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts;

    if (hasLogback || hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-main.log\"";
    }

    if (hasLogback) {
        amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    }
    if (hasLog4j) {
        amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
    }

    amCommand += " " + ApplicationMaster.class.getName() + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stdout.log" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager-stderr.log";
    amContainer.setCommands(Collections.singletonList(amCommand));

    LOG.debug("Application Master start command: " + amCommand);

    // intialize HDFS
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the sytem is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));

    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(conf, fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(conf, fs, appId.toString(), flinkConfigurationPath,
            flinkConf, fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[2 + shipFiles.size()];
    StringBuilder envShipFileList = new StringBuilder();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[2 + i] = Utils.setupLocalResource(conf, fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[2 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    Utils.setupEnv(conf, appMasterEnv);
    // set configuration values
    appMasterEnv.put(FlinkYarnClient.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(FlinkYarnClient.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(FlinkYarnClient.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_APP_ID, appId.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(FlinkYarnClient.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(FlinkYarnClient.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(FlinkYarnClient.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(FlinkYarnClient.ENV_STREAMING_MODE, String.valueOf(streamingMode));

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(FlinkYarnClient.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    int waittime = 0;
    loop: while (true) {
        ApplicationReport report = yarnClient.getApplicationReport(appId);
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further invesitage the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            LOG.info("Deploying cluster, current state " + appState);
            if (waittime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        waittime += 1000;
        Thread.sleep(1000);
    }
    // the Flink cluster is deployed in YARN. Represent cluster
    return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached);
}

From source file:org.apache.flink.yarn.FlinkYarnClientBase.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN./*from  w  ww  .  ja  va  2  s . c o m*/
 */
protected AbstractFlinkYarnCluster deployInternal() throws Exception {
    isReadyForDeployment();

    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    // Create application via yarnClient
    yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------

    Map<String, String> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }
    // ------------------ Check if the specified queue exists --------------

    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // ------------------ Check if the YARN Cluster has the requested resources --------------

    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
                + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB
                + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the JobManager available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb
                        + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are "
            + "connecting from the beginning because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink YARN client needs to wait until "
            + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.

    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    // ------------------ Prepare Application Master Container  ------------------------------

    // respect custom JVM options in the YAML file
    final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    boolean hasLogback = new File(logbackFile).exists();
    String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;

    boolean hasLog4j = new File(log4jFile).exists();
    if (hasLogback) {
        shipFiles.add(new File(logbackFile));
    }
    if (hasLog4j) {
        shipFiles.add(new File(log4jFile));
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx"
            + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts;

    if (hasLogback || hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\"";

        if (hasLogback) {
            amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
        }

        if (hasLog4j) {
            amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
        }
    }

    amCommand += " " + getApplicationMasterClass().getName() + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err";
    amContainer.setCommands(Collections.singletonList(amCommand));

    LOG.debug("Application Master start command: " + amCommand);

    // intialize HDFS
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[2 + shipFiles.size()];
    StringBuilder envShipFileList = new StringBuilder();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[2 + i] = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[2 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set classpath from YARN configuration
    Utils.setupEnv(conf, appMasterEnv);
    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    int waittime = 0;
    loop: while (true) {
        ApplicationReport report;
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            LOG.info("Deploying cluster, current state " + appState);
            if (waittime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        waittime += 1000;
        Thread.sleep(1000);
    }
    // print the application id for user to cancel themselves.
    if (isDetached()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    // the Flink cluster is deployed in YARN. Represent cluster
    return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached);
}

From source file:org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase.java

License:Apache License

private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);/*from www  . j a v a 2  s  .  co m*/
    yc.start();

    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
        tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
        tmpInFile = tmp.newFile();
        FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    Runner runner = startWithArgs(
            new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt",
                    flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", "-yD",
                    "yarn.heap-cutoff-ratio=0.5", // test if the cutoff is passed correctly
                    "-ytm", "1024", "-ys", "2", // test requesting slots from YARN.
                    "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output",
                    tmpOutFolder.getAbsoluteFile().toString() },
            "Job has been submitted with JobID", RunTypes.CLI_FRONTEND);

    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
        }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");

    // find out the application id and wait until it has finished.
    try {
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));

        ApplicationId tmpAppId;
        if (apps.size() == 1) {
            // Better method to find the right appId. But sometimes the app is shutting down very fast
            // Only one running
            tmpAppId = apps.get(0).getApplicationId();

            LOG.info("waiting for the job with appId {} to finish", tmpAppId);
            // wait until the app has finished
            while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
                sleep(500);
            }
        } else {
            // get appId by finding the latest finished appid
            apps = yc.getApplications();
            Collections.sort(apps, new Comparator<ApplicationReport>() {
                @Override
                public int compare(ApplicationReport o1, ApplicationReport o2) {
                    return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
                }
            });
            tmpAppId = apps.get(0).getApplicationId();
            LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
        }
        final ApplicationId id = tmpAppId;

        // now it has finished.
        // check the output files.
        File[] listOfOutputFiles = tmpOutFolder.listFiles();

        Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
        LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);

        // read all output files in output folder to one output string
        String content = "";
        for (File f : listOfOutputFiles) {
            if (f.isFile()) {
                content += FileUtils.readFileToString(f) + "\n";
            }
        }
        //String content = FileUtils.readFileToString(taskmanagerOut);
        // check for some of the wordcount outputs.
        Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'",
                content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
        Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'",
                content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)"));

        // check if the heap size for the TaskManager was set correctly
        File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString());
            }
        });
        Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
        content = FileUtils.readFileToString(jobmanagerLog);
        // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
        String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m";
        Assert.assertTrue(
                "Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'",
                content.contains(expected));
        expected = " (2/2) (attempt #0) to ";
        Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log."
                + "This string checks that the job has been started with a parallelism of 2. Log contents: '"
                + jobmanagerLog + "'", content.contains(expected));

        // make sure the detached app is really finished.
        LOG.info("Checking again that app has finished");
        ApplicationReport rep;
        do {
            sleep(500);
            rep = yc.getApplicationReport(id);
            LOG.info("Got report {}", rep);
        } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);

    } catch (Throwable t) {
        LOG.warn("Error while detached yarn session was running", t);
        Assert.fail(t.getMessage());
    }
}

From source file:org.apache.hoya.yarn.appmaster.HoyaAppMaster.java

License:Apache License

/**
 * Create and run the cluster./*  w  w w .j  a  v  a  2  s. c o m*/
 * @return exit code
 * @throws Throwable on a failure
 */
private int createAndRunCluster(String clustername) throws Throwable {
    HoyaVersionInfo.loadAndPrintVersionInfo(log);

    //load the cluster description from the cd argument
    String hoyaClusterDir = serviceArgs.getHoyaClusterURI();
    URI hoyaClusterURI = new URI(hoyaClusterDir);
    Path clusterDirPath = new Path(hoyaClusterURI);
    HoyaFileSystem fs = getClusterFS();

    // build up information about the running application -this
    // will be passed down to the cluster status
    MapOperations appInformation = new MapOperations();

    AggregateConf instanceDefinition = InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath);

    log.info("Deploying cluster {}:", instanceDefinition);

    //REVISIT: why is this done?
    appState.updateInstanceDefinition(instanceDefinition);
    File confDir = getLocalConfDir();
    if (!confDir.exists() || !confDir.isDirectory()) {
        log.error("Bad conf dir {}", confDir);
        File parentFile = confDir.getParentFile();
        log.error("Parent dir {}:\n{}", parentFile, HoyaUtils.listDir(parentFile));
        throw new BadCommandArgumentsException("Configuration directory %s doesn't exist", confDir);
    }

    Configuration serviceConf = getConfig();
    // Try to get the proper filtering of static resources through the yarn proxy working
    serviceConf.set("hadoop.http.filter.initializers",
            "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");

    conf = new YarnConfiguration(serviceConf);
    //get our provider
    MapOperations globalOptions = instanceDefinition.getInternalOperations().getGlobalOptions();
    String providerType = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_PROVIDER_NAME);
    log.info("Cluster provider type is {}", providerType);
    HoyaProviderFactory factory = HoyaProviderFactory.createHoyaProviderFactory(providerType);
    providerService = factory.createServerProvider();
    // init the provider BUT DO NOT START IT YET
    providerService.init(getConfig());
    addService(providerService);

    InetSocketAddress address = HoyaUtils.getRmSchedulerAddress(conf);
    log.info("RM is at {}", address);
    yarnRPC = YarnRPC.create(conf);

    /*
     * Extract the container ID. This is then
     * turned into an (incompete) container
     */
    appMasterContainerID = ConverterUtils.toContainerId(
            HoyaUtils.mandatoryEnvVariable(ApplicationConstants.Environment.CONTAINER_ID.name()));
    appAttemptID = appMasterContainerID.getApplicationAttemptId();

    ApplicationId appid = appAttemptID.getApplicationId();
    log.info("Hoya AM for ID {}", appid.getId());

    appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID, appMasterContainerID.toString());
    appInformation.put(StatusKeys.INFO_AM_APP_ID, appid.toString());
    appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID, appAttemptID.toString());

    UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
    Credentials credentials = currentUser.getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    dob.close();
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        log.info("Token {}", token.getKind());
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // set up secret manager
    secretManager = new ClientToAMTokenSecretManager(appAttemptID, null);

    // if not a secure cluster, extract the username -it will be
    // propagated to workers
    if (!UserGroupInformation.isSecurityEnabled()) {
        hoyaUsername = System.getenv(HADOOP_USER_NAME);
        log.info(HADOOP_USER_NAME + "='{}'", hoyaUsername);
    }

    Map<String, String> envVars;

    /**
     * It is critical this section is synchronized, to stop async AM events
     * arriving while registering a restarting AM.
     */
    synchronized (appState) {
        int heartbeatInterval = HEARTBEAT_INTERVAL;

        //add the RM client -this brings the callbacks in
        asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval, this);
        addService(asyncRMClient);
        //wrap it for the app state model
        rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient);
        //now bring it up
        runChildService(asyncRMClient);

        //nmclient relays callbacks back to this class
        nmClientAsync = new NMClientAsyncImpl("nmclient", this);
        runChildService(nmClientAsync);

        //bring up the Hoya RPC service
        startHoyaRPCServer();

        InetSocketAddress rpcServiceAddr = rpcService.getConnectAddress();
        appMasterHostname = rpcServiceAddr.getHostName();
        appMasterRpcPort = rpcServiceAddr.getPort();
        appMasterTrackingUrl = null;
        log.info("AM Server is listening at {}:{}", appMasterHostname, appMasterRpcPort);
        appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname);
        appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort);

        //build the role map
        List<ProviderRole> providerRoles = new ArrayList<ProviderRole>(providerService.getRoles());
        providerRoles.addAll(HoyaAMClientProvider.ROLES);

        // Start up the WebApp and track the URL for it
        webApp = new HoyaAMWebApp();
        WebApps.$for("hoyaam", WebAppApi.class, new WebAppApiImpl(this, appState, providerService), "ws")
                .with(serviceConf).start(webApp);
        appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port();
        WebAppService<HoyaAMWebApp> webAppService = new WebAppService<HoyaAMWebApp>("hoya", webApp);

        webAppService.init(conf);
        webAppService.start();
        addService(webAppService);

        appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/");
        appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port());

        // Register self with ResourceManager
        // This will start heartbeating to the RM
        // address = HoyaUtils.getRmSchedulerAddress(asyncRMClient.getConfig());
        log.info("Connecting to RM at {},address tracking URL={}", appMasterRpcPort, appMasterTrackingUrl);
        RegisterApplicationMasterResponse response = asyncRMClient.registerApplicationMaster(appMasterHostname,
                appMasterRpcPort, appMasterTrackingUrl);
        Resource maxResources = response.getMaximumResourceCapability();
        containerMaxMemory = maxResources.getMemory();
        containerMaxCores = maxResources.getVirtualCores();
        appState.setContainerLimits(maxResources.getMemory(), maxResources.getVirtualCores());
        // set the RM-defined maximum cluster values
        appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores));
        appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory));

        boolean securityEnabled = UserGroupInformation.isSecurityEnabled();
        if (securityEnabled) {
            secretManager.setMasterKey(response.getClientToAMTokenMasterKey().array());
            applicationACLs = response.getApplicationACLs();

            //tell the server what the ACLs are 
            rpcService.getServer().refreshServiceAcl(conf, new HoyaAMPolicyProvider());
        }

        // extract container list
        List<Container> liveContainers = AMRestartSupport.retrieveContainersFromPreviousAttempt(response);
        String amRestartSupported = Boolean.toString(liveContainers != null);
        appInformation.put(StatusKeys.INFO_AM_RESTART_SUPPORTED, amRestartSupported);

        //now validate the installation
        Configuration providerConf = providerService.loadProviderConfigurationInformation(confDir);

        providerService.validateApplicationConfiguration(instanceDefinition, confDir, securityEnabled);

        //determine the location for the role history data
        Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME);

        //build the instance
        appState.buildInstance(instanceDefinition, providerConf, providerRoles, fs.getFileSystem(), historyDir,
                liveContainers, appInformation);

        // add the AM to the list of nodes in the cluster

        appState.buildAppMasterNode(appMasterContainerID, appMasterHostname, webApp.port(),
                appMasterHostname + ":" + webApp.port());

        // build up environment variables that the AM wants set in every container
        // irrespective of provider and role.
        envVars = new HashMap<String, String>();
        if (hoyaUsername != null) {
            envVars.put(HADOOP_USER_NAME, hoyaUsername);
        }
    }
    String rolesTmpSubdir = appMasterContainerID.toString() + "/roles";

    String amTmpDir = globalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR);

    Path tmpDirPath = new Path(amTmpDir);
    Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir);
    fs.getFileSystem().mkdirs(launcherTmpDirPath);

    //launcher service
    launchService = new RoleLaunchService(this, providerService, fs, new Path(getGeneratedConfDir()), envVars,
            launcherTmpDirPath);

    runChildService(launchService);

    appState.noteAMLaunched();

    //Give the provider restricted access to the state
    providerService.bind(appState);

    // launch the provider; this is expected to trigger a callback that
    // brings up the service
    launchProviderService(instanceDefinition, confDir);

    try {
        //now block waiting to be told to exit the process
        waitForAMCompletionSignal();
        //shutdown time
    } finally {
        finish();
    }

    return amExitCode;
}

From source file:org.apache.hoya.yarn.client.HoyaClient.java

License:Apache License

/**
 *
 * @param clustername/*from ww  w .  java 2 s. co  m*/
 * @param clusterDirectory
 * @param instanceDefinition
 * @param debugAM
 * @return the launched application
 * @throws YarnException
 * @throws IOException
 */
public LaunchedApplication launchApplication(String clustername, Path clusterDirectory,
        AggregateConf instanceDefinition, boolean debugAM) throws YarnException, IOException {

    deployedClusterName = clustername;
    HoyaUtils.validateClusterName(clustername);
    verifyNoLiveClusters(clustername);
    Configuration config = getConfig();
    boolean clusterSecure = HoyaUtils.isClusterSecure(config);
    //create the Hoya AM provider -this helps set up the AM
    HoyaAMClientProvider hoyaAM = new HoyaAMClientProvider(config);

    instanceDefinition.resolve();
    launchedInstanceDefinition = instanceDefinition;

    ConfTreeOperations internalOperations = instanceDefinition.getInternalOperations();
    MapOperations internalOptions = internalOperations.getGlobalOptions();
    ConfTreeOperations resourceOperations = instanceDefinition.getResourceOperations();
    ConfTreeOperations appOperations = instanceDefinition.getAppConfOperations();
    Path generatedConfDirPath = createPathThatMustExist(
            internalOptions.getMandatoryOption(OptionKeys.INTERNAL_GENERATED_CONF_PATH));
    Path snapshotConfPath = createPathThatMustExist(
            internalOptions.getMandatoryOption(OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH));

    // cluster Provider
    AbstractClientProvider provider = createClientProvider(
            internalOptions.getMandatoryOption(OptionKeys.INTERNAL_PROVIDER_NAME));
    // make sure the conf dir is valid;

    // now build up the image path
    // TODO: consider supporting apps that don't have an image path
    Path imagePath = HoyaUtils.extractImagePath(hoyaFileSystem, internalOptions);
    if (log.isDebugEnabled()) {
        log.debug(instanceDefinition.toString());
    }
    MapOperations hoyaAMResourceComponent = resourceOperations.getOrAddComponent(HoyaKeys.COMPONENT_AM);
    AppMasterLauncher amLauncher = new AppMasterLauncher(clustername, HoyaKeys.APP_TYPE, config, hoyaFileSystem,
            yarnClient, clusterSecure, hoyaAMResourceComponent);

    ApplicationId appId = amLauncher.getApplicationId();
    // set the application name;
    amLauncher.setKeepContainersOverRestarts(true);

    amLauncher.setMaxAppAttempts(config.getInt(KEY_AM_RESTART_LIMIT, DEFAULT_AM_RESTART_LIMIT));

    hoyaFileSystem.purgeHoyaAppInstanceTempFiles(clustername);
    Path tempPath = hoyaFileSystem.createHoyaAppInstanceTempPath(clustername, appId.toString() + "/am");
    String libdir = "lib";
    Path libPath = new Path(tempPath, libdir);
    hoyaFileSystem.getFileSystem().mkdirs(libPath);
    log.debug("FS={}, tempPath={}, libdir={}", hoyaFileSystem.toString(), tempPath, libPath);
    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = amLauncher.getLocalResources();
    // conf directory setup
    Path remoteConfPath = null;
    String relativeConfDir = null;
    String confdirProp = System.getProperty(HoyaKeys.PROPERTY_CONF_DIR);
    if (confdirProp == null || confdirProp.isEmpty()) {
        log.debug("No local configuration directory provided as system property");
    } else {
        File confDir = new File(confdirProp);
        if (!confDir.exists()) {
            throw new BadConfigException(HOYA_CONFIGURATION_DIRECTORY_NOT_FOUND, confDir);
        }
        Path localConfDirPath = HoyaUtils.createLocalPath(confDir);
        log.debug("Copying AM configuration data from {}", localConfDirPath);
        remoteConfPath = new Path(clusterDirectory, HoyaKeys.SUBMITTED_CONF_DIR);
        HoyaUtils.copyDirectory(config, localConfDirPath, remoteConfPath, null);
    }
    // the assumption here is that minimr cluster => this is a test run
    // and the classpath can look after itself

    if (!getUsingMiniMRCluster()) {

        log.debug("Destination is not a MiniYARNCluster -copying full classpath");

        // insert conf dir first
        if (remoteConfPath != null) {
            relativeConfDir = HoyaKeys.SUBMITTED_CONF_DIR;
            Map<String, LocalResource> submittedConfDir = hoyaFileSystem.submitDirectory(remoteConfPath,
                    relativeConfDir);
            HoyaUtils.mergeMaps(localResources, submittedConfDir);
        }

        log.debug("Copying JARs from local filesystem");
        // Copy the application master jar to the filesystem
        // Create a local resource to point to the destination jar path

        HoyaUtils.putJar(localResources, hoyaFileSystem, this.getClass(), tempPath, libdir, SLIDER_JAR);
    }
    // build up the configuration 
    // IMPORTANT: it is only after this call that site configurations
    // will be valid.

    propagatePrincipals(config, instanceDefinition);
    Configuration clientConfExtras = new Configuration(false);
    // then build up the generated path.
    FsPermission clusterPerms = getClusterDirectoryPermissions(config);
    HoyaUtils.copyDirectory(config, snapshotConfPath, generatedConfDirPath, clusterPerms);

    // add AM and provider specific artifacts to the resource map
    Map<String, LocalResource> providerResources;
    // standard AM resources
    hoyaAM.prepareAMAndConfigForLaunch(hoyaFileSystem, config, amLauncher, instanceDefinition, snapshotConfPath,
            generatedConfDirPath, clientConfExtras, libdir, tempPath);
    //add provider-specific resources
    provider.prepareAMAndConfigForLaunch(hoyaFileSystem, config, amLauncher, instanceDefinition,
            snapshotConfPath, generatedConfDirPath, clientConfExtras, libdir, tempPath);

    // now that the site config is fully generated, the provider gets
    // to do a quick review of them.
    log.debug("Preflight validation of cluster configuration");

    hoyaAM.preflightValidateClusterConfiguration(hoyaFileSystem, clustername, config, instanceDefinition,
            clusterDirectory, generatedConfDirPath, clusterSecure);

    provider.preflightValidateClusterConfiguration(hoyaFileSystem, clustername, config, instanceDefinition,
            clusterDirectory, generatedConfDirPath, clusterSecure);

    // now add the image if it was set
    if (hoyaFileSystem.maybeAddImagePath(localResources, imagePath)) {
        log.debug("Registered image path {}", imagePath);
    }

    // build the environment
    amLauncher.putEnv(HoyaUtils.buildEnvMap(hoyaAMResourceComponent));
    String classpath = HoyaUtils.buildClasspath(relativeConfDir, libdir, getConfig(), getUsingMiniMRCluster());
    amLauncher.setEnv("CLASSPATH", classpath);
    if (log.isDebugEnabled()) {
        log.debug("AM classpath={}", classpath);
        log.debug("Environment Map:\n{}", HoyaUtils.stringifyMap(amLauncher.getEnv()));
        log.debug("Files in lib path\n{}", hoyaFileSystem.listFSDir(libPath));
    }

    // rm address

    InetSocketAddress rmSchedulerAddress = null;
    try {
        rmSchedulerAddress = HoyaUtils.getRmSchedulerAddress(config);
    } catch (IllegalArgumentException e) {
        throw new BadConfigException("%s Address invalid: %s", YarnConfiguration.RM_SCHEDULER_ADDRESS,
                config.get(YarnConfiguration.RM_SCHEDULER_ADDRESS));

    }
    String rmAddr = NetUtils.getHostPortString(rmSchedulerAddress);

    CommandLineBuilder commandLine = new CommandLineBuilder();
    commandLine.addJavaBinary();
    // insert any JVM options);
    hoyaAM.addJVMOptions(instanceDefinition, commandLine);
    // enable asserts if the text option is set
    commandLine.enableJavaAssertions();
    // add the hoya AM sevice entry point
    commandLine.add(HoyaAMArgs.CLASSNAME);

    // create action and the cluster name
    commandLine.add(HoyaActions.ACTION_CREATE);
    commandLine.add(clustername);

    // debug
    if (debugAM) {
        commandLine.add(Arguments.ARG_DEBUG);
    }

    // set the cluster directory path
    commandLine.add(Arguments.ARG_HOYA_CLUSTER_URI);
    commandLine.add(clusterDirectory.toUri().toString());

    if (!isUnset(rmAddr)) {
        commandLine.add(Arguments.ARG_RM_ADDR);
        commandLine.add(rmAddr);
    }

    if (serviceArgs.getFilesystemURL() != null) {
        commandLine.add(Arguments.ARG_FILESYSTEM);
        commandLine.add(serviceArgs.getFilesystemURL().toString());
    }

    if (clusterSecure) {
        // if the cluster is secure, make sure that
        // the relevant security settings go over
        propagateConfOption(commandLine, config, HoyaXmlConfKeys.KEY_SECURITY_ENABLED);
        propagateConfOption(commandLine, config, DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY);
    }
    // write out the path output
    commandLine.addOutAndErrFiles(STDOUT_AM, STDERR_AM);

    String cmdStr = commandLine.build();
    log.info("Completed setting up app master command {}", cmdStr);

    amLauncher.addCommandLine(commandLine);

    // the Hoya AM gets to configure the AM requirements, not the custom provider
    hoyaAM.prepareAMResourceRequirements(hoyaAMResourceComponent, amLauncher.getResource());

    // Set the priority for the application master

    int amPriority = config.getInt(KEY_YARN_QUEUE_PRIORITY, DEFAULT_YARN_QUEUE_PRIORITY);

    amLauncher.setPriority(amPriority);

    // Set the queue to which this application is to be submitted in the RM
    // Queue for App master
    String amQueue = config.get(KEY_YARN_QUEUE, DEFAULT_HOYA_YARN_QUEUE);

    amLauncher.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure

    // submit the application
    LaunchedApplication launchedApplication = amLauncher.submitApplication();
    return launchedApplication;
}