Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.schedoscope.export.jdbc.JdbcExportJob.java

License:Apache License

private Job configure() throws Exception {

    Configuration conf = getConfiguration();
    conf = configureHiveMetaStore(conf);
    conf = configureKerberos(conf);/*from w w w .  j  a  v a 2  s .co  m*/
    conf = configureAnonFields(conf);

    Job job = Job.getInstance(conf, "JDBCExport: " + inputDatabase + "." + inputTable);

    job.setJarByClass(JdbcExportJob.class);
    job.setMapperClass(JdbcExportMapper.class);
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(numReducer);

    if (inputFilter == null || inputFilter.trim().equals("")) {
        HCatInputFormat.setInput(job, inputDatabase, inputTable);

    } else {
        HCatInputFormat.setInput(job, inputDatabase, inputTable, inputFilter);
    }

    Schema outputSchema = SchemaFactory.getSchema(dbConnectionString, job.getConfiguration());
    HCatSchema hcatInputSchema = HCatInputFormat.getTableSchema(job.getConfiguration());

    String[] columnNames = SchemaUtils.getColumnNamesFromHcatSchema(hcatInputSchema, outputSchema);
    String[] columnTypes = SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, outputSchema,
            ImmutableSet.copyOf(anonFields));

    String outputTable = inputDatabase + "_" + inputTable;

    JdbcOutputFormat.setOutput(job.getConfiguration(), dbConnectionString, dbUser, dbPassword, outputTable,
            inputFilter, numReducer, commitSize, storageEngine, distributeBy, columnNames, columnTypes);

    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(JdbcOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(JdbcOutputWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(JdbcOutputWritable.class);

    Class<?> clazz = Class.forName(outputSchema.getDriverName());
    String jarFile = ClassUtil.findContainingJar(clazz);
    String jarSelf = ClassUtil.findContainingJar(JdbcExportJob.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String tmpDir = job.getConfiguration().get("hadoop.tmp.dir");
    Path hdfsDir = new Path(
            tmpDir + "/" + new Path(jarFile).getName() + "." + RandomStringUtils.randomNumeric(20));

    if (jarFile != null && jarSelf != null && tmpDir != null && !jarFile.equals(jarSelf)) {
        LOG.info("copy " + LOCAL_PATH_PREFIX + jarFile + " to " + tmpDir);
        fs.copyFromLocalFile(false, true, new Path(LOCAL_PATH_PREFIX + jarFile), hdfsDir);
        LOG.info("add " + hdfsDir + " to distributed cache");
        job.addArchiveToClassPath(hdfsDir);
    }

    return job;
}

From source file:org.smartfrog.services.hadoop.operations.utils.DfsUtils.java

License:Open Source License

/**
 * Copy a local file into HDFS//from ww  w . ja v a  2s .  c o m
 *
 * @param fileSystem filesystem for the destination
 * @param source     source file
 * @param dest       dest path
 * @param overwrite  should there be an overwrite?
 * @throws SmartFrogRuntimeException if the copy failed
 */
public static void copyLocalFileIn(FileSystem fileSystem, File source, Path dest, boolean overwrite)
        throws SmartFrogRuntimeException {
    if (!source.exists()) {
        throw new SmartFrogRuntimeException(ERROR_MISSING_SOURCE_FILE + source);
    }
    Path localSource = new Path(source.toURI().toString());
    try {
        fileSystem.copyFromLocalFile(false, overwrite, localSource, dest);
    } catch (IOException e) {
        throw new SmartFrogRuntimeException(
                FAILED_TO_COPY + source + " to " + dest + " on " + fileSystem.getUri(), e);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void copyFromLocal(String src, String src2, String... dst) {
    Object[] va = parseVarargs(src, src2, dst);
    @SuppressWarnings("unchecked")
    List<Path> srcs = (List<Path>) va[0];
    Path dstPath = (Path) va[1];

    try {//from ww  w .j av a2s . c  om
        FileSystem dstFs = getFS(dstPath);
        dstFs.copyFromLocalFile(false, false, srcs.toArray(new Path[srcs.size()]), dstPath);
    } catch (IOException ex) {
        throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
    }
}

From source file:org.starschema.hadoop.yarn.applications.distributedshell.Client.java

License:Apache License

/**
 * Main run function for the client//w  ww  .  ja va 2  s . com
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request 
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max. 
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max. 
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources         
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed 
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed. 
    // To do this, we need to first copy into the filesystem that is visible 
    // to the yarn framework. 
    // We do not need to set this as a local resource for the application 
    // master as the application master does not need it.       
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    LOG.info("Copy Hazelcast zip from local filesystem and add to local environment");
    String hdfsHazelLocation = "";
    long hdfsHazelLen = 0;
    long hdfsHazelTimestamp = 0;
    if (!hazelcastZip.isEmpty()) {
        Path hazelSrc = new Path(hazelcastZip);
        String hazelPathSuffix = appName + "/" + appId.toString() + "/" + HAZELCAST_PATH;
        Path hazelDst = new Path(fs.getHomeDirectory(), hazelPathSuffix);
        fs.copyFromLocalFile(false, true, hazelSrc, hazelDst);
        hdfsHazelLocation = hazelDst.toUri().toString();
        LOG.info("Hazelcast zip location: " + hdfsHazelLocation);
        FileStatus hazelFileStatus = fs.getFileStatus(hazelDst);
        hdfsHazelLen = hazelFileStatus.getLen();
        hdfsHazelTimestamp = hazelFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the 
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    env.put(DSConstants.HAZELLOCATION, hdfsHazelLocation);
    env.put(DSConstants.HAZELTIMESTAMP, Long.toString(hdfsHazelTimestamp));
    env.put(DSConstants.HAZELLEN, Long.toString(hdfsHazelLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath       
    // At some point we should not be required to add 
    // the hadoop specific classpaths to the env. 
    // It should be provided out of the box. 
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master 
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command 
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name 
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and 
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide? 
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.wonderbee.hadoop.util.HadoopUtils.java

License:Apache License

/**
   Upload a local file to the cluster//  www.j a v  a 2 s  .  co  m
 */
public static void uploadLocalFile(Path localsrc, Path hdfsdest, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(hdfsdest) && fs.getFileStatus(hdfsdest).isDir()) {
        fs.delete(hdfsdest, true);
    }
    fs.copyFromLocalFile(false, true, localsrc, hdfsdest);
}

From source file:origin.hadoop.yarn.distributedshell.Client.java

License:Apache License

/**
 * Main run function for the client/*from w  w  w  . j  a v  a  2s .c o m*/
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:proxyyarn.ProxyYarn.java

License:Apache License

public boolean run() throws Exception {
    Configuration conf = new YarnConfiguration(new Configuration());

    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);//from   ww  w.  ja  v a 2 s . c  o m
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    log.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    log.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        log.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo("default");
    log.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            log.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    FileSystem fs = FileSystem.get(conf);
    if (!fs.getClass().equals(DistributedFileSystem.class)) {
        log.error("Expected DistributedFileSystem, but was {}", fs.getClass().getSimpleName());
        System.exit(1);
    }

    //    ApplicationClientProtocol applicationsManager;
    //    InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS));

    //    log.info("Connecting to ResourceManager at {}", rmAddress);
    //    Configuration appManagerServerConf = new Configuration(conf);
    //    YarnRPC rpc = YarnRPC.create(appManagerServerConf);
    //    ApplicationClientProtocol applicationManager = (ApplicationClientProtocol) rpc.getProxy(ApplicationClientProtocol.class, rmAddress, appManagerServerConf);

    String appName = "AccumuloProxyYarn";
    YarnClientApplication app = yarnClient.createApplication();

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();
    appContext.setApplicationName(appName);

    //    GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class);
    //    GetNewApplicationResponse response = applicationManager.getNewApplication(request);
    //    log.info("Got new ApplicationId=" + response.getApplicationId());

    //    ApplicationId appId = response.getApplicationId();

    // Create a new ApplicationSubmissionContext
    //    ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
    // set the ApplicationId
    //    appContext.setApplicationId(appId);
    // set the application name
    //    appContext.setApplicationName(appName);

    // Create a new container launch context for the AM's container
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // Define the local resources required
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    // Lets assume the jar we need for our ApplicationMaster is available in
    // HDFS at a certain known path to us and we want to make it available to
    // the ApplicationMaster in the launched container
    Path localJarPath = new Path(
            "file:///Users/jelser/projects/accumulo-proxy-yarn/target/accumulo-proxy-yarn-0.0.1-SNAPSHOT.jar");
    Path jarPath = new Path("hdfs:///accumulo-proxy-yarn-0.0.1-SNAPSHOT.jar");
    fs.copyFromLocalFile(false, true, localJarPath, jarPath);
    FileStatus jarStatus = fs.getFileStatus(jarPath);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);

    // Set the type of resource - file or archive
    // archives are untarred at the destination by the framework
    amJarRsrc.setType(LocalResourceType.FILE);

    // Set visibility of the resource
    // Setting to most private option i.e. this file will only
    // be visible to this instance of the running application
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);

    // Set the location of resource to be copied over into the
    // working directory
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath));

    // Set timestamp and length of file so that the framework
    // can do basic sanity checks for the local resource
    // after it has been copied over to ensure it is the same
    // resource the client intended to use with the application
    amJarRsrc.setTimestamp(jarStatus.getModificationTime());
    amJarRsrc.setSize(jarStatus.getLen());

    // The framework will create a symlink called AppMaster.jar in the
    // working directory that will be linked back to the actual file.
    // The ApplicationMaster, if needs to reference the jar file, would
    // need to use the symlink filename.
    localResources.put("AppMaster.jar", amJarRsrc);

    // Set the local resources into the launch context
    amContainer.setLocalResources(localResources);

    // Set up the environment needed for the launch context
    Map<String, String> env = new HashMap<String, String>();

    // For example, we could setup the classpath needed.
    // Assuming our classes or jars are available as local resources in the
    // working directory from which the command will be run, we need to append
    // "." to the path.
    // By default, all the hadoop specific classpaths will already be available
    // in $CLASSPATH, so we should be careful not to overwrite it.
    String classPathEnv = "$CLASSPATH:./*:/Users/jelser/projects/accumulo-proxy-yarn/target/lib/*";
    env.put("CLASSPATH", classPathEnv);
    amContainer.setEnvironment(env);

    // Construct the command to be executed on the launched container
    String command = "${JAVA_HOME}" + "/bin/java" + " proxyyarn.ProxyYarnAppMaster 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr";

    List<String> commands = new ArrayList<String>();
    commands.add(command);
    // add additional commands if needed

    // Set the command array into the container spec
    amContainer.setCommands(commands);

    // Define the resource requirements for the container
    // For now, YARN only supports memory so we set the memory
    // requirements.
    // If the process takes more than its allocated memory, it will
    // be killed by the framework.
    // Memory being requested for should be less than max capability
    // of the cluster and all asks should be a multiple of the min capability.
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    appContext.setResource(capability);

    // Create the request to send to the ApplicationsManager
    //    SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class);
    //    appRequest.setApplicationSubmissionContext(appContext);

    // Submit the application to the ApplicationsManager
    // Ignore the response as either a valid response object is returned on
    // success or an exception thrown to denote the failure
    //    applicationManager.submitApplication(appRequest);

    // Set the container launch content into the ApplicationSubmissionContext
    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide? 
    pri.setPriority(0);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue("default");

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    log.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    return monitorApplication(yarnClient, appId);
    /*    Thread.sleep(200);
                
        boolean running = false;
        while(true) {
          GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class);
          reportRequest.setApplicationId(appId);
          GetApplicationReportResponse reportResponse = applicationManager.getApplicationReport(reportRequest);
          ApplicationReport report = reportResponse.getApplicationReport();
                  
          log.info(report.toString());
                  
          YarnApplicationState state = report.getYarnApplicationState();
          switch (state) {
            case NEW:
            case NEW_SAVING:
            case SUBMITTED:
            case ACCEPTED:
              log.info("State: {}", state);
              break;
            case RUNNING:
              log.info("Running application");
              running = true;
              break;
            case FINISHED:
            case FAILED:
            case KILLED:
              log.info("State: {}", state);
              return;
            default:
              log.info("Unknown state: {}", state);
              return;
          }
                  
          if (!running) {
            Thread.sleep(1000);
          }
        }*/

}

From source file:runtime.starter.MPJYarnClient.java

License:Open Source License

public void run() throws Exception {

    Map<String, String> map = System.getenv();

    try {//  ww w. j av a2s .c o m
        mpjHomeDir = map.get("MPJ_HOME");

        if (mpjHomeDir == null) {
            throw new Exception("[MPJRun.java]:MPJ_HOME environment found..");
        }
    } catch (Exception exc) {
        System.out.println("[MPJRun.java]:" + exc.getMessage());
        exc.printStackTrace();
        return;
    }

    // Copy the application master jar to HDFS
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    /*
          Path dataset = new Path(fs.getHomeDirectory(),"/dataset");
          FileStatus datasetFile = fs.getFileStatus(dataset);
                 
          BlockLocation myBlocks [] = fs.getFileBlockLocations(datasetFile,0,datasetFile.getLen());
          for(BlockLocation b : myBlocks){
            System.out.println("\n--------------------");
            System.out.println("Length "+b.getLength());
            for(String host : b.getHosts()){
              System.out.println("host "+host);
            }
          }
    */
    Path source = new Path(mpjHomeDir + "/lib/mpj-app-master.jar");
    String pathSuffix = hdfsFolder + "mpj-app-master.jar";
    Path dest = new Path(fs.getHomeDirectory(), pathSuffix);

    if (debugYarn) {
        logger.info("Uploading mpj-app-master.jar to: " + dest.toString());
    }

    fs.copyFromLocalFile(false, true, source, dest);
    FileStatus destStatus = fs.getFileStatus(dest);

    Path wrapperSource = new Path(mpjHomeDir + "/lib/mpj-yarn-wrapper.jar");
    String wrapperSuffix = hdfsFolder + "mpj-yarn-wrapper.jar";
    Path wrapperDest = new Path(fs.getHomeDirectory(), wrapperSuffix);

    if (debugYarn) {
        logger.info("Uploading mpj-yarn-wrapper.jar to: " + wrapperDest.toString());
    }

    fs.copyFromLocalFile(false, true, wrapperSource, wrapperDest);

    Path userJar = new Path(jarPath);
    String userJarSuffix = hdfsFolder + "user-code.jar";
    Path userJarDest = new Path(fs.getHomeDirectory(), userJarSuffix);

    if (debugYarn) {
        logger.info("Uploading user-code.jar to: " + userJarDest.toString());
    }

    fs.copyFromLocalFile(false, true, userJar, userJarDest);

    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    if (debugYarn) {
        YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();
        logger.info("\nNodes Information");
        logger.info("Number of NM: " + metrics.getNumNodeManagers() + "\n");

        List<NodeReport> nodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        for (NodeReport n : nodeReports) {
            logger.info("NodeId: " + n.getNodeId());
            logger.info("RackName: " + n.getRackName());
            logger.info("Total Memory: " + n.getCapability().getMemory());
            logger.info("Used Memory: " + n.getUsed().getMemory());
            logger.info("Total vCores: " + n.getCapability().getVirtualCores());
            logger.info("Used vCores: " + n.getUsed().getVirtualCores() + "\n");
        }
    }

    logger.info("Creating server socket at HOST " + serverName + " PORT " + serverPort + " \nWaiting for " + np
            + " processes to connect...");

    // Creating a server socket for incoming connections
    try {
        servSock = new ServerSocket(serverPort);
        infoSock = new ServerSocket();
        TEMP_PORT = findPort(infoSock);
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();

    int maxMem = appResponse.getMaximumResourceCapability().getMemory();

    if (debugYarn) {
        logger.info("Max memory capability resources in cluster: " + maxMem);
    }

    if (amMem > maxMem) {
        amMem = maxMem;
        logger.info("AM memory specified above threshold of cluster "
                + "Using maximum memory for AM container: " + amMem);
    }
    int maxVcores = appResponse.getMaximumResourceCapability().getVirtualCores();

    if (debugYarn) {
        logger.info("Max vCores capability resources in cluster: " + maxVcores);
    }

    if (amCores > maxVcores) {
        amCores = maxVcores;
        logger.info("AM virtual cores specified above threshold of cluster "
                + "Using maximum virtual cores for AM container: " + amCores);
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    List<String> commands = new ArrayList<String>();
    commands.add("$JAVA_HOME/bin/java");
    commands.add("-Xmx" + amMem + "m");
    commands.add("runtime.starter.MPJAppMaster");
    commands.add("--np");
    commands.add(String.valueOf(np));
    commands.add("--serverName");
    commands.add(serverName); //server name
    commands.add("--ioServerPort");
    commands.add(Integer.toString(serverPort)); //server port
    commands.add("--deviceName");
    commands.add(deviceName); //device name
    commands.add("--className");
    commands.add(className); //class name
    commands.add("--wdir");
    commands.add(workingDirectory); //wdir
    commands.add("--psl");
    commands.add(Integer.toString(psl)); //protocol switch limit
    commands.add("--wireUpPort");
    commands.add(String.valueOf(TEMP_PORT)); //for sharing ports & rank
    commands.add("--wrapperPath");
    commands.add(wrapperDest.toString());//MPJYarnWrapper.jar HDFS path
    commands.add("--userJarPath");
    commands.add(userJarDest.toString());//User Jar File HDFS path
    commands.add("--mpjContainerPriority");
    commands.add(mpjContainerPriority);// priority for mpj containers 
    commands.add("--containerMem");
    commands.add(containerMem);
    commands.add("--containerCores");
    commands.add(containerCores);

    if (debugYarn) {
        commands.add("--debugYarn");
    }

    if (appArgs != null) {

        commands.add("--appArgs");

        for (int i = 0; i < appArgs.length; i++) {
            commands.add(appArgs[i]);
        }
    }

    amContainer.setCommands(commands); //set commands

    // Setup local Resource for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);

    appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(dest));
    appMasterJar.setSize(destStatus.getLen());
    appMasterJar.setTimestamp(destStatus.getModificationTime());
    appMasterJar.setType(LocalResourceType.ARCHIVE);
    appMasterJar.setVisibility(LocalResourceVisibility.APPLICATION);

    amContainer.setLocalResources(Collections.singletonMap("mpj-app-master.jar", appMasterJar));

    // Setup CLASSPATH for ApplicationMaster
    // Setting up the environment
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    setupAppMasterEnv(appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMem);
    capability.setVirtualCores(amCores);

    // Finally, set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();

    appContext.setApplicationName(appName);
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue(yarnQueue); // queue

    Priority priority = Priority.newInstance(amPriority);
    appContext.setPriority(priority);

    ApplicationId appId = appContext.getApplicationId();

    //Adding ShutDown Hook
    Runtime.getRuntime().addShutdownHook(new KillYarnApp(appId, yarnClient));

    // Submit application
    System.out.println("Submitting Application: " + appContext.getApplicationName() + "\n");

    try {
        isRunning = true;
        yarnClient.submitApplication(appContext);
    } catch (Exception exp) {
        System.err.println("Error Submitting Application");
        exp.printStackTrace();
    }

    // np = number of processes , + 1 for Application Master container
    IOMessagesThread[] ioThreads = new IOMessagesThread[np + 1];

    peers = new String[np];
    socketList = new Vector<Socket>();
    int wport = 0;
    int rport = 0;
    int rank = 0;

    // np + 1 IOThreads
    for (int i = 0; i < (np + 1); i++) {
        try {
            sock = servSock.accept();

            //start IO thread to read STDOUT and STDERR from wrappers
            IOMessagesThread io = new IOMessagesThread(sock);
            ioThreads[i] = io;
            ioThreads[i].start();
        } catch (Exception e) {
            System.err.println("Error accepting connection from peer socket..");
            e.printStackTrace();
        }
    }

    // Loop to read port numbers from Wrapper.java processes
    // and to create WRAPPER_INFO (containing all IPs and ports)
    String WRAPPER_INFO = "#Peer Information";
    for (int i = np; i > 0; i--) {
        try {
            sock = infoSock.accept();

            DataOutputStream out = new DataOutputStream(sock.getOutputStream());
            DataInputStream in = new DataInputStream(sock.getInputStream());
            if (in.readUTF().startsWith("Sending Info")) {
                wport = in.readInt();
                rport = in.readInt();
                rank = in.readInt();
                peers[rank] = ";" + sock.getInetAddress().getHostAddress() + "@" + rport + "@" + wport + "@"
                        + rank;
                socketList.add(sock);
            }
        } catch (Exception e) {
            System.err.println("[MPJYarnClient.java]: Error accepting" + " connection from peer socket!");
            e.printStackTrace();
        }
    }

    for (int i = 0; i < np; i++) {
        WRAPPER_INFO += peers[i];
    }
    // Loop to broadcast WRAPPER_INFO to all Wrappers
    for (int i = np; i > 0; i--) {
        try {
            sock = socketList.get(np - i);
            DataOutputStream out = new DataOutputStream(sock.getOutputStream());

            out.writeUTF(WRAPPER_INFO);
            out.flush();

            sock.close();
        } catch (Exception e) {
            System.err.println("[MPJYarnClient.java]: Error closing" + " connection from peer socket..");
            e.printStackTrace();
        }
    }

    try {
        infoSock.close();
    } catch (IOException exp) {
        exp.printStackTrace();
    }

    // wait for all IO Threads to complete 
    for (int i = 0; i < (np + 1); i++) {
        ioThreads[i].join();
    }
    isRunning = true;

    System.out.println("\nApplication Statistics!");
    while (true) {
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
        fStatus = appReport.getFinalApplicationStatus();
        if (appState == YarnApplicationState.FINISHED) {
            isRunning = false;
            if (fStatus == FinalApplicationStatus.SUCCEEDED) {
                System.out.println("State: " + fStatus);
            } else {
                System.out.println("State: " + fStatus);
            }
            break;
        } else if (appState == YarnApplicationState.KILLED) {
            isRunning = false;
            System.out.println("State: " + appState);
            break;
        } else if (appState == YarnApplicationState.FAILED) {
            isRunning = false;
            System.out.println("State: " + appState);
            break;
        }
        Thread.sleep(100);
    }

    try {

        if (debugYarn) {
            logger.info("Cleaning the files from hdfs: ");
            logger.info("1) " + dest.toString());
            logger.info("2) " + wrapperDest.toString());
            logger.info("3) " + userJarDest.toString());
        }

        fs.delete(dest);
        fs.delete(wrapperDest);
        fs.delete(userJarDest);
    } catch (IOException exp) {
        exp.printStackTrace();
    }
    System.out.println("Application ID: " + appId + "\n" + "Application User: " + appReport.getUser() + "\n"
            + "RM Queue: " + appReport.getQueue() + "\n" + "Start Time: " + appReport.getStartTime() + "\n"
            + "Finish Time: " + appReport.getFinishTime());
}

From source file:simsql.runtime.MRLoader.java

License:Apache License

public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) {

    // make a directory for the relation
    Configuration conf = new Configuration();
    FileSystem dfs = null;

    try {/*from   w  w w . ja  va 2  s .c om*/
        dfs = FileSystem.get(conf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot access HDFS!", e);
    }

    try {
        // if it exists, destroy it.
        Path path = new Path(outputPath);
        if (dfs.exists(path)) {
            dfs.delete(path, true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Could not create the file to bulk load to!", e);
    }

    // find a file name 
    String tempPath = null;
    if (inputPath.startsWith("hdfs:")) {
        tempPath = inputPath.replace("hdfs:", "");
    } else {
        tempPath = "/tempDataFile_" + r.getName();
        try {
            dfs.delete(new Path(tempPath), true);
        } catch (Exception e) {
            // ignore this.
        }

        // upload the text file
        try {
            dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath));
            dfs.deleteOnExit(new Path(tempPath));
        } catch (Exception e) {
            throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e);
        }
    }

    // set up the new job's parameters.
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass());

    conf.set("io.serializations",
            "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt("simsql.loader.numAtts", r.getAttributes().size());
    conf.setInt("simsql.loader.typeCode", (int) typeCode);
    conf.setInt("simsql.loader.sortAtt", sortAtt);

    String[] myStrings = new String[r.getAttributes().size()];
    int j = 0;
    for (simsql.compiler.Attribute a : r.getAttributes()) {
        myStrings[j++] = a.getPhysicalRealization().getClass().getName();
    }

    conf.setStrings("simsql.loader.types", myStrings);

    // create a job
    Job job;
    try {
        job = new Job(conf);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create bulk loading job!", e);
    }

    // set the split size (number of mappers)
    long fSize = 0;
    if (inputPath.startsWith("hdfs")) {
        fSize = RelOp.getPathsTotalSize(new String[] { tempPath });
    } else {
        fSize = new File(inputPath).length();
    }

    FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks);
    FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks);

    // and the number of reducers
    job.setNumReduceTasks(numTasks);

    // the mapper/reducer/jar
    job.setMapperClass(MRLoaderMapper.class);
    job.setReducerClass(MRLoaderReducer.class);
    job.setJarByClass(MRLoader.class);

    // I/O settings.
    job.setOutputFormatClass(RecordOutputFormat.class);

    job.setMapOutputKeyClass(RecordKey.class);
    job.setMapOutputValueClass(RecordWrapper.class);
    job.setOutputKeyClass(Nothing.class);
    job.setOutputValueClass(Record.class);
    try {
        FileInputFormat.setInputPaths(job, new Path(tempPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    } catch (Exception e) {
        throw new RuntimeException("Could not set job inputs/outputs", e);
    }
    job.setGroupingComparatorClass(RecordKeyGroupingComparator.class);
    job.setPartitionerClass(RecordPartitioner.class);
    job.setSortComparatorClass(RecordKeySortComparator.class);

    job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath);

    // run it
    Counters counters;
    try {
        job.waitForCompletion(true);
        counters = job.getCounters();
    } catch (Exception e) {
        throw new RuntimeException("Could not set up bulk loader job!", e);
    }

    // now, delete all the empty part files
    try {

        // get a filesystem
        FileSystem ddfs = FileSystem.get(conf);
        Path outPath = new Path(outputPath);
        if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) {
            FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter());
            for (FileStatus ff : fstatus) {
                if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around...
                    ddfs.delete(ff.getPath(), true);
                }
            }
        }
    } catch (Exception e) { // this isn't disastrous 
    }

    // get the counter for the output of the mapper.
    Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN);
    return bytesCounter.getValue();
}

From source file:test.anna.dshell.Client.java

License:Apache License

/**
 * Main run function for the client//from ww  w.  ja va2 s.  co  m
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request 
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max. 
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max. 
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources         
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);
    System.out.println("CLIENT>>> copy the " + appMasterJar + " to " + appMasterJarPath + " in container 0.");

    // Set the log4j properties if needed 
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed. 
    // To do this, we need to first copy into the filesystem that is visible 
    // to the yarn framework. 
    // We do not need to set this as a local resource for the application 
    // master as the application master does not need it.       
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the 
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // Add AppMaster.jar location to classpath       
    // At some point we should not be required to add 
    // the hadoop specific classpaths to the env. 
    // It should be provided out of the box. 
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master 
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command 
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name 
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and 
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide? 
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}