Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Uploads binary resources to HDFS for use by the AM
 * @return//  w  w w .ja v a 2  s  .  c  o  m
 * @throws IOException
 * @throws YarnException
 */
public List<DFSResourceCoordinate> distributeBinaries() throws IOException, YarnException {

    List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2);
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    Path src, dst;
    FileStatus destStatus;
    String pathSuffix;

    // adding info so we can add the jar to the App master container path

    // Add the asterix tarfile to HDFS for easy distribution
    // Keep it all archived for now so add it as a file...

    pathSuffix = CONF_DIR_REL + instanceFolder + "asterix-server.zip";
    dst = new Path(fs.getHomeDirectory(), pathSuffix);
    if (refresh) {
        if (fs.exists(dst)) {
            fs.delete(dst, false);
        }
    }
    if (!fs.exists(dst)) {
        src = new Path(asterixZip);
        LOG.info("Copying Asterix distributable to DFS");
        fs.copyFromLocalFile(false, true, src, dst);
    }
    destStatus = fs.getFileStatus(dst);
    LocalResource asterixTarLoc = Records.newRecord(LocalResource.class);
    asterixTarLoc.setType(LocalResourceType.ARCHIVE);
    asterixTarLoc.setVisibility(LocalResourceVisibility.PRIVATE);
    asterixTarLoc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    asterixTarLoc.setTimestamp(destStatus.getModificationTime());

    // adding info so we can add the tarball to the App master container path
    DFSResourceCoordinate tar = new DFSResourceCoordinate();
    tar.envs.put(dst.toUri().toString(), AConstants.TARLOCATION);
    tar.envs.put(Long.toString(asterixTarLoc.getSize()), AConstants.TARLEN);
    tar.envs.put(Long.toString(asterixTarLoc.getTimestamp()), AConstants.TARTIMESTAMP);
    tar.res = asterixTarLoc;
    tar.name = "asterix-server.zip";
    resources.add(tar);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.PRIVATE);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        DFSResourceCoordinate l4j = new DFSResourceCoordinate();
        tar.res = log4jRsrc;
        tar.name = "log4j.properties";
        resources.add(l4j);
    }

    resources.addAll(installAmLibs());
    return resources;
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

private static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean deleteSource,
        boolean overwrite, Configuration conf) throws IOException {

    Preconditions.checkArgument(srcFs.exists(src),
            String.format("Cannot copy from %s to %s because src does not exist", src, dst));
    Preconditions.checkArgument(overwrite || !dstFs.exists(dst),
            String.format("Cannot copy from %s to %s because dst exists", src, dst));

    try {/*  ww  w . ja  v a 2  s  .  c  om*/
        boolean isSourceFileSystemLocal = srcFs instanceof LocalFileSystem
                || srcFs instanceof RawLocalFileSystem;
        if (isSourceFileSystemLocal) {
            try {
                dstFs.copyFromLocalFile(deleteSource, overwrite, src, dst);
            } catch (IOException e) {
                throw new IOException(String.format("Failed to copy %s to %s", src, dst), e);
            }
        } else if (!FileUtil.copy(srcFs, src, dstFs, dst, deleteSource, overwrite, conf)) {
            throw new IOException(String.format("Failed to copy %s to %s", src, dst));
        }
    } catch (Throwable t1) {
        try {
            deleteIfExists(dstFs, dst, true);
        } catch (Throwable t2) {
            // Do nothing
        }
        throw t1;
    }
}

From source file:hadoop.yarn.distributedshell.DshellClient.java

License:Apache License

/**
 * Main run function for the client//from   w  ww.j av  a2 s .c  om
 * 
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask
    // if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource
    // manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }
    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    // amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct
    // local resource for the
    // eventual containers that will be launched to execute the shell
    // scripts
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DshellDSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // ========================================jar?
    if (containerJarPaths.length != 0) {
        for (int i = 0; i < containerJarPaths.length; i++) {
            String hdfsJarLocation = "";
            String[] jarNameSplit = containerJarPaths[i].split("/");
            String jarName = jarNameSplit[jarNameSplit.length - 1];

            long hdfsJarLen = 0;
            long hdfsJarTimestamp = 0;
            if (!containerJarPaths[i].isEmpty()) {
                Path jarSrc = new Path(containerJarPaths[i]);
                String jarPathSuffix = appName + "/" + appId.toString() + "/" + jarName;
                Path jarDst = new Path(fs.getHomeDirectory(), jarPathSuffix);
                fs.copyFromLocalFile(false, true, jarSrc, jarDst);
                hdfsJarLocation = jarDst.toUri().toString();
                FileStatus jarFileStatus = fs.getFileStatus(jarDst);
                hdfsJarLen = jarFileStatus.getLen();
                hdfsJarTimestamp = jarFileStatus.getModificationTime();
                env.put(DshellDSConstants.DISTRIBUTEDJARLOCATION + i, hdfsJarLocation);
                env.put(DshellDSConstants.DISTRIBUTEDJARTIMESTAMP + i, Long.toString(hdfsJarTimestamp));
                env.put(DshellDSConstants.DISTRIBUTEDJARLEN + i, Long.toString(hdfsJarLen));
            }
        }
    }
    // ========================================jar?

    // ========================================archive?
    if (containerArchivePaths.length != 0) {
        for (int i = 0; i < containerArchivePaths.length; i++) {
            String hdfsArchiveLocation = "";
            String[] archiveNameSplit = containerArchivePaths[i].split("/");
            String archiveName = archiveNameSplit[archiveNameSplit.length - 1];

            long hdfsArchiveLen = 0;
            long hdfsArchiveTimestamp = 0;
            if (!containerArchivePaths[i].isEmpty()) {
                Path archiveSrc = new Path(containerArchivePaths[i]);
                String archivePathSuffix = appName + "/" + appId.toString() + "/" + archiveName;
                Path archiveDst = new Path(fs.getHomeDirectory(), archivePathSuffix);
                fs.copyFromLocalFile(false, true, archiveSrc, archiveDst);
                hdfsArchiveLocation = archiveDst.toUri().toString();
                FileStatus archiveFileStatus = fs.getFileStatus(archiveDst);
                hdfsArchiveLen = archiveFileStatus.getLen();
                hdfsArchiveTimestamp = archiveFileStatus.getModificationTime();
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVELOCATION + i, hdfsArchiveLocation);
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVETIMESTAMP + i, Long.toString(hdfsArchiveTimestamp));
                env.put(DshellDSConstants.DISTRIBUTEDARCHIVELEN + i, Long.toString(hdfsArchiveLen));
            }
        }
    }
    // ========================================archive?

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    capability.setVirtualCores(amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide?
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:inflater.runner.RunInflater.java

License:MIT License

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException {
    if (args.length < 2) {
        return -1;
    }//ww  w  .j  av  a2s.  c o m
    if (conf == null) {
        conf = new Configuration();
    }

    GiraphConfiguration giraphConf = new GiraphConfiguration(getConf());
    giraphConf.addResource(new Path("giraph-site.xml"));

    GiraphJob job = new GiraphJob(giraphConf, giraphConf.getComputationName());

    Path inputLocal = new Path(args[0]);
    Path outputLocal = new Path(args[1]);

    // We copy file from local file system to HDFS
    FileSystem fs = FileSystem.get(giraphConf);
    inputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + inputLocal.getName());
    inputHDFS = fs.makeQualified(inputHDFS);

    outputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + "output");
    outputHDFS = fs.makeQualified(outputHDFS);

    fs.copyFromLocalFile(false, true, inputLocal, inputHDFS);

    // Delete output path because Hadoop cannot override it.
    if (fs.exists(outputHDFS))
        fs.delete(outputHDFS, true);

    FileOutputFormat.setOutputPath(job.getInternalJob(), outputHDFS);
    GiraphFileInputFormat.addVertexInputPath(giraphConf, inputHDFS);
    new GiraphConfigurationValidator<>(giraphConf).validateConfiguration();
    boolean result = job.run(true);
    if (result) {
        fs.copyToLocalFile(false, new Path(outputHDFS, "part-m-00000"), outputLocal);
    }
    return result ? 0 : -1;
}

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s)
 *//*from ww w  .  jav  a  2s.  c  o  m*/
public static void distributeResources(Configuration yarnConf, Properties appConf, String appName)
        throws IOException {
    final FileSystem distFs = FileSystem.get(yarnConf);
    final FileSystem localFs = FileSystem.getLocal(yarnConf);
    try {

        //distribute configuration
        final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration");
        final FSDataOutputStream fs = distFs.create(dstConfig);
        appConf.store(fs, "Yarn1 Application Config for " + appName);
        fs.close();
        log.info("Updated resource " + dstConfig);

        //distribute main jar
        final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile()
                .replace(".jar/", ".jar");
        final Path src;
        final String jarName = appName + ".jar";
        if (localPath.endsWith(".jar")) {
            log.info("Distributing local jar : " + localPath);
            src = new Path(localPath);
        } else {
            try {
                String localArchive = localPath + appName + ".jar";
                localFs.delete(new Path(localArchive), false);
                log.info("Unpacking compile scope dependencies: " + localPath);
                executeShell("mvn -f " + localPath + "/../.. generate-resources");
                log.info("Preparing application main jar " + localArchive);
                executeShell("jar cMf " + localArchive + " -C " + localPath + " ./");
                src = new Path(localArchive);

            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }

        byte[] digest;
        final MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = new FileInputStream(src.toString())) {
            DigestInputStream dis = new DigestInputStream(is, md);
            byte[] buffer = new byte[8192];
            int numOfBytesRead;
            while ((numOfBytesRead = dis.read(buffer)) > 0) {
                md.update(buffer, 0, numOfBytesRead);
            }
            digest = md.digest();
        }
        log.info("Local check sum: " + Hex.encodeHexString(digest));

        final Path dst = new Path(distFs.getHomeDirectory(), jarName);
        Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5");
        boolean checksumMatches = false;
        if (distFs.isFile(remoteChecksumFile)) {
            try (InputStream r = distFs.open(remoteChecksumFile)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                int nRead;
                byte[] data = new byte[1024];
                while ((nRead = r.read(data, 0, data.length)) != -1) {
                    buffer.write(data, 0, nRead);
                }
                buffer.flush();
                byte[] remoteDigest = buffer.toByteArray();
                log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest));
                checksumMatches = Arrays.equals(digest, remoteDigest);

            }
        }
        if (!checksumMatches) {
            log.info("Updating resource " + dst + " ...");
            distFs.copyFromLocalFile(false, true, src, dst);
            try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) {
                log.info("Updating checksum " + remoteChecksumFile + " ...");
                remoteChecksumStream.write(digest);
            }
            FileStatus scFileStatus = distFs.getFileStatus(dst);
            log.info("Updated resource " + dst + " " + scFileStatus.getLen());
        }
    } catch (NoSuchAlgorithmException e) {
        throw new IOException(e);
    }
}

From source file:io.dataapps.chlorine.hadoop.DeepScanPipeline.java

License:Apache License

public void run() {
    try {//w w  w . j  a v  a2  s. c om
        final Path fsScanPath = new Path(scanPath);
        final Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        if (findersFilePath != null) {
            fs.copyFromLocalFile(false, true, new Path(findersFilePath), new Path("chlorine_finders.xml"));
        }
        Job job = HDFSScanMR.makeJob(conf, fsScanPath, new Path(jobOutputDir), matchPath, scanSince,
                findersFilePath, queue, maskPath);
        boolean bResult = runJobToCompletion(job);
        if (bResult) {
            LOG.info("Total bytes scanned = "
                    + job.getCounters().findCounter("Feature", "TotalSize").getValue());
            LOG.info("Total records scanned = " + job.getCounters()
                    .findCounter("org.apache.hadoop.mapreduce.TaskCounter", "MAP_INPUT_RECORDS").getValue());
            LOG.info("Total Matched records = "
                    + job.getCounters().findCounter("Feature", "MatchedRecords").getValue());
            LOG.info("Total matches = " + job.getCounters().findCounter("Feature", "TotalMatches").getValue());
            FinderEngine engine = new FinderEngine();
            for (Finder finder : engine.getFinders()) {
                long l = job.getCounters().findCounter("Feature", finder.getName()).getValue();
                if (l > 0) {
                    LOG.info(finder.getName() + " = "
                            + job.getCounters().findCounter("Feature", "TotalMatches").getValue());
                }
            }
            if (matchPath != null) {
                String tempMatchesPath = jobOutputDir + Path.SEPARATOR + "_temp";
                String matchOutputPath = matchPath + Path.SEPARATOR + "scan_result_" + scanPath.hashCode() + "_"
                        + scanSince;
                FileUtil.copyMerge(fs, new Path(tempMatchesPath), fs, new Path(matchOutputPath), true, conf,
                        null);
                LOG.info("The matches detected are stored in " + matchOutputPath);
            }

            if (maskPath != null) {
                LOG.info("The matches in the input are masked and a copy is kept under " + maskPath);

            }
        }

    } catch (IOException e) {
        LOG.error(e);
    }
}

From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override/*from ww w  . j  a v  a2 s  . c o  m*/
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + M + re); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override//from ww  w. j  av  a2  s .  co m
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + (M - re)); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:me.haosdent.noya.Client.java

License:Apache License

/**
 * Main run function for the client//from w  ww. jav  a  2  s .  c  o m
 *
 * @return true if application completed successfully
 *
 * @throws java.io.IOException
 * @throws org.apache.hadoop.yarn.exceptions.YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    //appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }
    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            ApplicationConstants.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(ApplicationConstants.Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    //commands.add(command.toString());
    commands.add("echo 'hello' >/tmp/yarn_test");
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    capability.setVirtualCores(amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide?
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:ml.shifu.guagua.yarn.util.YarnUtils.java

License:Apache License

private static void copyToFs(Configuration conf, String local, String remote) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path src = new Path(local);
    Path dst = fs.makeQualified(new Path(remote));
    fs.copyFromLocalFile(false, true, src, dst);
}