Example usage for org.apache.hadoop.fs FileSystem getHomeDirectory

List of usage examples for org.apache.hadoop.fs FileSystem getHomeDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getHomeDirectory.

Prototype

public Path getHomeDirectory() 

Source Link

Document

Return the current user's home directory in this FileSystem.

Usage

From source file:gobblin.util.JobLauncherUtils.java

License:Apache License

private static ParallelRunner getParallelRunner(FileSystem fs, Closer closer, int parallelRunnerThreads,
        Map<String, ParallelRunner> parallelRunners) {
    String uriAndHomeDir = new Path(new Path(fs.getUri()), fs.getHomeDirectory()).toString();
    if (!parallelRunners.containsKey(uriAndHomeDir)) {
        parallelRunners.put(uriAndHomeDir, closer.register(new ParallelRunner(parallelRunnerThreads, fs)));
    }//from www  .ja v a 2  s  .  c  o  m
    return parallelRunners.get(uriAndHomeDir);
}

From source file:hadoop.yarn.distributedshell.DshellClient.java

License:Apache License

/**
 * Main run function for the client//from  w  w w. j  av a  2  s .  co m
 * 
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask
    // if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource
    // manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }
    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    // amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct
    // local resource for the
    // eventual containers that will be launched to execute the shell
    // scripts
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // ========================================jar?
    if (containerJarPaths.length != 0) {
        for (int i = 0; i < containerJarPaths.length; i++) {
            String hdfsJarLocation = "";
            String[] jarNameSplit = containerJarPaths[i].split("/");
            String jarName = jarNameSplit[jarNameSplit.length - 1];

            long hdfsJarLen = 0;
            long hdfsJarTimestamp = 0;
            if (!containerJarPaths[i].isEmpty()) {
                Path jarSrc = new Path(containerJarPaths[i]);
                String jarPathSuffix = appName + "/" + appId.toString() + "/" + jarName;
                Path jarDst = new Path(fs.getHomeDirectory(), jarPathSuffix);
                fs.copyFromLocalFile(false, true, jarSrc, jarDst);
                hdfsJarLocation = jarDst.toUri().toString();
                FileStatus jarFileStatus = fs.getFileStatus(jarDst);
                hdfsJarLen = jarFileStatus.getLen();
                hdfsJarTimestamp = jarFileStatus.getModificationTime();
                env.put(DshellDSConstants.DISTRIBUTEDJARLOCATION + i, hdfsJarLocation);
                env.put(DshellDSConstants.DISTRIBUTEDJARTIMESTAMP + i, Long.toString(hdfsJarTimestamp));
                env.put(DshellDSConstants.DISTRIBUTEDJARLEN + i, Long.toString(hdfsJarLen));
            }
        }
    }
    // ========================================jar?

    // ========================================archive?
    if (containerArchivePaths.length != 0) {
        for (int i = 0; i < containerArchivePaths.length; i++) {
            String hdfsArchiveLocation = "";
            String[] archiveNameSplit = containerArchivePaths[i].split("/");
            String archiveName = archiveNameSplit[archiveNameSplit.length - 1];

            long hdfsArchiveLen = 0;
            long hdfsArchiveTimestamp = 0;
            if (!containerArchivePaths[i].isEmpty()) {
                Path archiveSrc = new Path(containerArchivePaths[i]);
                String archivePathSuffix = appName + "/" + appId.toString() + "/" + archiveName;
                Path archiveDst = new Path(fs.getHomeDirectory(), archivePathSuffix);
                fs.copyFromLocalFile(false, true, archiveSrc, archiveDst);
                hdfsArchiveLocation = archiveDst.toUri().toString();
                FileStatus archiveFileStatus = fs.getFileStatus(archiveDst);
                hdfsArchiveLen = archiveFileStatus.getLen();
                hdfsArchiveTimestamp = archiveFileStatus.getModificationTime();
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVELOCATION + i, hdfsArchiveLocation);
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVETIMESTAMP + i, Long.toString(hdfsArchiveTimestamp));
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVELEN + i, Long.toString(hdfsArchiveLen));
            }
        }
    }
    // ========================================archive?

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    capability.setVirtualCores(amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide?
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:inflater.runner.RunInflater.java

License:MIT License

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException {
    if (args.length < 2) {
        return -1;
    }/*  w w  w .j  a va2  s .  c  om*/
    if (conf == null) {
        conf = new Configuration();
    }

    GiraphConfiguration giraphConf = new GiraphConfiguration(getConf());
    giraphConf.addResource(new Path("giraph-site.xml"));

    GiraphJob job = new GiraphJob(giraphConf, giraphConf.getComputationName());

    Path inputLocal = new Path(args[0]);
    Path outputLocal = new Path(args[1]);

    // We copy file from local file system to HDFS
    FileSystem fs = FileSystem.get(giraphConf);
    inputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + inputLocal.getName());
    inputHDFS = fs.makeQualified(inputHDFS);

    outputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + "output");
    outputHDFS = fs.makeQualified(outputHDFS);

    fs.copyFromLocalFile(false, true, inputLocal, inputHDFS);

    // Delete output path because Hadoop cannot override it.
    if (fs.exists(outputHDFS))
        fs.delete(outputHDFS, true);

    FileOutputFormat.setOutputPath(job.getInternalJob(), outputHDFS);
    GiraphFileInputFormat.addVertexInputPath(giraphConf, inputHDFS);
    new GiraphConfigurationValidator<>(giraphConf).validateConfiguration();
    boolean result = job.run(true);
    if (result) {
        fs.copyToLocalFile(false, new Path(outputHDFS, "part-m-00000"), outputLocal);
    }
    return result ? 0 : -1;
}

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s)
 *//* w  ww.  jav  a2  s  .  c o  m*/
public static void distributeResources(Configuration yarnConf, Properties appConf, String appName)
        throws IOException {
    final FileSystem distFs = FileSystem.get(yarnConf);
    final FileSystem localFs = FileSystem.getLocal(yarnConf);
    try {

        //distribute configuration
        final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration");
        final FSDataOutputStream fs = distFs.create(dstConfig);
        appConf.store(fs, "Yarn1 Application Config for " + appName);
        fs.close();
        log.info("Updated resource " + dstConfig);

        //distribute main jar
        final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile()
                .replace(".jar/", ".jar");
        final Path src;
        final String jarName = appName + ".jar";
        if (localPath.endsWith(".jar")) {
            log.info("Distributing local jar : " + localPath);
            src = new Path(localPath);
        } else {
            try {
                String localArchive = localPath + appName + ".jar";
                localFs.delete(new Path(localArchive), false);
                log.info("Unpacking compile scope dependencies: " + localPath);
                executeShell("mvn -f " + localPath + "/../.. generate-resources");
                log.info("Preparing application main jar " + localArchive);
                executeShell("jar cMf " + localArchive + " -C " + localPath + " ./");
                src = new Path(localArchive);

            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }

        byte[] digest;
        final MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = new FileInputStream(src.toString())) {
            DigestInputStream dis = new DigestInputStream(is, md);
            byte[] buffer = new byte[8192];
            int numOfBytesRead;
            while ((numOfBytesRead = dis.read(buffer)) > 0) {
                md.update(buffer, 0, numOfBytesRead);
            }
            digest = md.digest();
        }
        log.info("Local check sum: " + Hex.encodeHexString(digest));

        final Path dst = new Path(distFs.getHomeDirectory(), jarName);
        Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5");
        boolean checksumMatches = false;
        if (distFs.isFile(remoteChecksumFile)) {
            try (InputStream r = distFs.open(remoteChecksumFile)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                int nRead;
                byte[] data = new byte[1024];
                while ((nRead = r.read(data, 0, data.length)) != -1) {
                    buffer.write(data, 0, nRead);
                }
                buffer.flush();
                byte[] remoteDigest = buffer.toByteArray();
                log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest));
                checksumMatches = Arrays.equals(digest, remoteDigest);

            }
        }
        if (!checksumMatches) {
            log.info("Updating resource " + dst + " ...");
            distFs.copyFromLocalFile(false, true, src, dst);
            try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) {
                log.info("Updating checksum " + remoteChecksumFile + " ...");
                remoteChecksumStream.write(digest);
            }
            FileStatus scFileStatus = distFs.getFileStatus(dst);
            log.info("Updated resource " + dst + " " + scFileStatus.getLen());
        }
    } catch (NoSuchAlgorithmException e) {
        throw new IOException(e);
    }
}

From source file:io.amient.yarn1.YarnContainerContext.java

License:Open Source License

private void prepareLocalResourceFile(Map<String, LocalResource> localResources, String fileName,
        String remoteFileName, FileSystem distFs) throws IOException {
    final Path dst = new Path(distFs.getHomeDirectory(), remoteFileName);
    FileStatus scFileStatus = distFs.getFileStatus(dst);
    final URL yarnUrl = ConverterUtils.getYarnUrlFromURI(dst.toUri());
    LocalResource scRsrc = LocalResource.newInstance(yarnUrl, LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileName, scRsrc);
}

From source file:io.dstream.tez.utils.HadoopUtils.java

License:Apache License

/**
 * Provisions resource represented as {@link File} to the {@link FileSystem} for a given application
 *
 * @param localResource/* w  w  w.java  2  s . c o m*/
 * @param fs
 * @param applicationName
 * @return
 */
public static Path provisionResourceToFs(File localResource, FileSystem fs, String applicationName)
        throws Exception {
    String destinationFilePath = applicationName + "/" + localResource.getName();
    Path provisionedPath = new Path(fs.getHomeDirectory(), destinationFilePath);
    provisioinResourceToFs(fs, new Path(localResource.getAbsolutePath()), provisionedPath);
    return provisionedPath;
}

From source file:io.dstream.tez.utils.HadoopUtils.java

License:Apache License

/**
 * Will provision current classpath to YARN and return an array of
 * {@link Path}s representing provisioned resources
 * If 'generate-jar' system property is set it will also generate the JAR for the current
 * working directory (mainly used when executing from IDE)
 *//*from   ww  w .j a  v a 2  s.  co  m*/
private static Path[] provisionClassPath(FileSystem fs, String applicationName, String[] classPathExclusions) {
    String genJarProperty = System.getProperty(TezConstants.GENERATE_JAR);
    boolean generateJar = genJarProperty != null && Boolean.parseBoolean(genJarProperty);
    List<Path> provisionedPaths = new ArrayList<Path>();
    List<File> generatedJars = new ArrayList<File>();

    boolean confFromHadoopConfDir = generateConfigJarFromHadoopConfDir(fs, applicationName, provisionedPaths,
            generatedJars);

    TezConfiguration tezConf = new TezConfiguration(fs.getConf());
    boolean provisionTez = true;
    if (tezConf.get("tez.lib.uris") != null) {
        provisionTez = false;
    }
    URL[] classpath = ((URLClassLoader) ClassLoader.getSystemClassLoader()).getURLs();
    for (URL classpathUrl : classpath) {
        File f = new File(classpathUrl.getFile());
        if (f.isDirectory()) {
            if (generateJar) {
                String jarFileName = ClassPathUtils.generateJarFileName("application");
                f = doGenerateJar(f, jarFileName, generatedJars, "application");
            } else if (f.getName().equals("conf") && !confFromHadoopConfDir) {
                String jarFileName = ClassPathUtils.generateJarFileName("conf_application");
                f = doGenerateJar(f, jarFileName, generatedJars, "configuration");
            } else {
                f = null;
            }
        }
        if (f != null) {
            if (f.getName().startsWith("tez-") && !provisionTez) {
                logger.info("Skipping provisioning of " + f.getName()
                        + " since Tez libraries are already provisioned");
                continue;
            }
            String destinationFilePath = applicationName + "/" + f.getName();
            Path provisionedPath = new Path(fs.getHomeDirectory(), destinationFilePath);
            if (shouldProvision(provisionedPath.getName(), classPathExclusions)) {
                try {
                    provisioinResourceToFs(fs, new Path(f.getAbsolutePath()), provisionedPath);
                    provisionedPaths.add(provisionedPath);
                } catch (Exception e) {
                    logger.warn("Failed to provision " + provisionedPath + "; " + e.getMessage());
                    if (logger.isDebugEnabled()) {
                        logger.trace("Failed to provision " + provisionedPath, e);
                    }
                }
            }
        }

    }

    for (File generatedJar : generatedJars) {
        try {
            generatedJar.delete();
        } catch (Exception e) {
            logger.warn("Failed to delete generated jars", e);
        }
    }
    return provisionedPaths.toArray(new Path[] {});
}

From source file:io.dstream.tez.utils.HadoopUtils.java

License:Apache License

/**
 *
 *//*from  www . ja v  a  2  s  . com*/
private static boolean generateConfigJarFromHadoopConfDir(FileSystem fs, String applicationName,
        List<Path> provisionedPaths, List<File> generatedJars) {
    boolean generated = false;
    String hadoopConfDir = System.getenv().get("HADOOP_CONF_DIR");
    if (hadoopConfDir != null && hadoopConfDir.trim().length() > 0) {
        String jarFileName = ClassPathUtils.generateJarFileName("conf_");
        File confDir = new File(hadoopConfDir.trim());
        File jarFile = doGenerateJar(confDir, jarFileName, generatedJars, "configuration (HADOOP_CONF_DIR)");
        String destinationFilePath = applicationName + "/" + jarFile.getName();
        Path provisionedPath = new Path(fs.getHomeDirectory(), destinationFilePath);

        try {
            provisioinResourceToFs(fs, new Path(jarFile.getAbsolutePath()), provisionedPath);
            provisionedPaths.add(provisionedPath);
            generated = true;
        } catch (Exception e) {
            logger.warn("Failed to provision " + provisionedPath + "; " + e.getMessage());
            if (logger.isDebugEnabled()) {
                logger.warn("Failed to provision " + provisionedPath, e);
            }
            throw new IllegalStateException(e);
        }
    }

    String tezConfDir = System.getenv().get("TEZ_CONF_DIR");
    if (tezConfDir != null && tezConfDir.trim().length() > 0) {
        String jarFileName = ClassPathUtils.generateJarFileName("conf_tez");
        File confDir = new File(tezConfDir.trim());
        File jarFile = doGenerateJar(confDir, jarFileName, generatedJars, "configuration (TEZ_CONF_DIR)");

        try {
            URLClassLoader cl = (URLClassLoader) ClassLoader.getSystemClassLoader();
            Method m = URLClassLoader.class.getDeclaredMethod("addURL", URL.class);
            m.setAccessible(true);
            m.invoke(cl, jarFile.toURI().toURL());
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }

        String destinationFilePath = applicationName + "/" + jarFile.getName();
        Path provisionedPath = new Path(fs.getHomeDirectory(), destinationFilePath);

        try {
            provisioinResourceToFs(fs, new Path(jarFile.getAbsolutePath()), provisionedPath);
            provisionedPaths.add(provisionedPath);
            generated = true;
        } catch (Exception e) {
            logger.warn("Failed to provision " + provisionedPath + "; " + e.getMessage());
            if (logger.isDebugEnabled()) {
                logger.warn("Failed to provision " + provisionedPath, e);
            }
            throw new IllegalStateException(e);
        }

    }
    return generated;
}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN.//from  w  ww .j a v  a 2 s.co  m
 */
protected YarnClusterClient deployInternal() throws Exception {
    isReadyForDeployment();
    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    final YarnClient yarnClient = getYarnClient();

    // ------------------ Check if the specified queue exists --------------------
    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        // check only if there are queues configured in yarn and for this session.
        if (queues.size() > 0 && this.yarnQueue != null) {
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    Map<String, String> jobSystemProperties = new HashMap<>(2);

    // Certificates are materialized locally so DFSClient can be set to null
    // LocalResources are not used by Flink, so set it null
    HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username,
            services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(),
            JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(),
            appResponse.getApplicationId().toString());

    StringBuilder tmpBuilder = new StringBuilder();
    for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) {
        String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue());
        javaOptions.add(option);
        addHopsworksParam(option);
        tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@");
    }

    dynamicPropertiesEncoded += tmpBuilder.toString();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------
    Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    // ------------------ Set default file system scheme -------------------------
    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the 
    // getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is "
                + "using the default Hadoop configuration values. The Flink YARN "
                + "client needs to store its files in a distributed file system");
    }

    // ------ Check if the YARN ClusterClient has the requested resources ---
    // the yarnMinAllocationMB specifies the smallest possible container 
    // allocation size. all allocations below this value are automatically 
    // set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible "
                + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is "
                + yarnMinAllocationMB + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will"
                + " account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the "
            + "'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager"
                + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: "
                + jobManagerMemoryMb + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, "
            + "but maybe not all TaskManagers are connecting from the beginning "
            + "because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink "
            + "YARN client needs to wait until the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.
    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace);

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        classPathBuilder.append(shipFile.getName());
        if (shipFile.isDirectory()) {
            // add directories to the classpath
            classPathBuilder.append(File.separator).append("*");
        }
        classPathBuilder.append(File.pathSeparator);

        envShipFileList.append(remotePath).append(",");
    }
    ////////////////////////////////////////////////////////////////////////////
    /*
     * Add Hops LocalResources paths here
     *
     */
    //Add it to localResources
    for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) {
        localResources.put(entry.getKey(), entry.getValue());
        //Append name to classPathBuilder
        classPathBuilder.append(entry.getKey());
        classPathBuilder.append(File.pathSeparator);
    }

    ////////////////////////////////////////////////////////////////////////////
    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    // setup security tokens
    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this "
                    + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info("Deployment took more than 60 seconds. Please check if the "
                        + "requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }

    String host = report.getHost();
    int port = report.getRpcPort();

    // Correctly initialize the Flink config
    flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
    flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);

    // the Flink cluster is deployed in YARN. Represent cluster
    return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true);
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

private Map<String, LocalResource> prepareLocalResources(FileSystem fs, ApplicationId appId,
        DistributedCacheList dcl) throws IOException {
    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<>();

    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    addResource(fs, appId, amJar, null, Constants.AM_JAR_PATH, null, localResources, null);

    if (!log4jPropFile.isEmpty()) {
        addResource(fs, appId, log4jPropFile, null, Constants.LOG4J_PATH, null, localResources, null);
    }//w w  w.  ja v  a  2s.  c o m

    // Write distCacheList to HDFS and add to localResources
    Path baseDir = new Path(fs.getHomeDirectory(), Constants.YARNTF_STAGING + "/" + appId.toString());
    Path dclPath = new Path(baseDir, Constants.DIST_CACHE_PATH);
    FSDataOutputStream ostream = null;
    try {
        ostream = fs.create(dclPath);
        ostream.write(SerializationUtils.serialize(dcl));
    } finally {
        IOUtils.closeQuietly(ostream);
    }
    FileStatus dclStatus = fs.getFileStatus(dclPath);
    LocalResource distCacheResource = LocalResource.newInstance(
            ConverterUtils.getYarnUrlFromURI(dclPath.toUri()), LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, dclStatus.getLen(), dclStatus.getModificationTime());
    localResources.put(Constants.DIST_CACHE_PATH, distCacheResource);

    return localResources;
}