Example usage for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.commoncrawl.mapred.ec2.parser.OutputCommitter.java

License:Open Source License

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
    LOG.info("COMMITTER- Needs Commit Called on:" + context.getTaskAttemptID().toString());
    try {//from  w ww  .  ja  va2  s.  c  o  m
        Path taskOutputPath = getTempTaskOutputPath(context);
        if (taskOutputPath != null) {
            context.getProgressible().progress();
            FileSystem fs = FileSystem.get(context.getJobConf());
            LOG.info("COMMITTER - Default FS is:" + fs.getUri());
            // Get the file-system for the task output directory
            FileSystem fsFromPath = taskOutputPath.getFileSystem(context.getJobConf());
            // since task output path is created on demand, 
            // if it exists, task needs a commit
            LOG.info("COMMITTER - Checking if outputPath Exists:" + taskOutputPath + " for task:"
                    + context.getTaskAttemptID().toString());
            if (fs.exists(taskOutputPath)) {
                LOG.info("Needs Commit Returning TRUE");
                return true;
            }
        }
    } catch (IOException ioe) {
        throw ioe;
    }
    LOG.info("COMMITTER Needs Commit Returning FALSE");
    return false;
}

From source file:org.commoncrawl.mapred.ec2.postprocess.crawldb.LinkGraphDataEmitterJob.java

License:Open Source License

public LinkGraphDataEmitterJob(Configuration conf) throws Exception {
    FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf);
    LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments");
    SortedSet<Long> validSegments = scanForValidSegments(fs);
    LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments");
    SortedSet<Long> mergedSegments = scanForMergedSegments(fs);
    LOG.info("There are: " + mergedSegments.size() + " merged Segments");
    // calculate difference 
    Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments);
    LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged");
    // ok we are ready to go .. 
    //int iteration = 0;
    for (long segmentId : segmentsToProcess) {
        LOG.info("Queueing Segment:" + segmentId + " for Merge");
        queue(fs, conf, segmentId);/*from  ww w .  jav  a 2s  .  c o  m*/
    }
    // queue shutdown items 
    for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) {
        _queue.put(new QueueItem());
    }
}

From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkCollectorJob.java

License:Open Source License

public LinkCollectorJob(Configuration conf) throws Exception {
    FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf);
    LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments");
    SortedSet<Long> validSegments = scanForValidSegments(fs);
    LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments");
    SortedSet<Long> mergedSegments = scanForMergedSegments(fs);
    LOG.info("There are: " + mergedSegments.size() + " merged Segments");
    // calculate difference 
    Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments);
    LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged");
    // ok we are ready to go .. 
    int iteration = 0;
    for (long segmentId : segmentsToProcess) {
        LOG.info("Queueing Segment:" + segmentId + " for Merge");
        queue(fs, conf, segmentId);//from   ww  w .ja  va  2s.c om
    }
    // queue shutdown items 
    for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) {
        _queue.put(new QueueItem());
    }
}

From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkGraphDataEmitterJob.java

License:Open Source License

public LinkGraphDataEmitterJob(Configuration conf) throws Exception {
    FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf);
    LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments");
    SortedSet<Long> validSegments = scanForValidSegments(fs);
    LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments");
    SortedSet<Long> mergedSegments = scanForMergedSegments(fs);
    LOG.info("There are: " + mergedSegments.size() + " merged Segments");
    // calculate difference 
    Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments);
    LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged");
    // ok we are ready to go .. 
    int iteration = 0;
    for (long segmentId : segmentsToProcess) {
        LOG.info("Queueing Segment:" + segmentId + " for Merge");
        queue(fs, conf, segmentId);/*from   w ww  .  j  a  v a 2s  .c o  m*/
    }
    // queue shutdown items 
    for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) {
        _queue.put(new QueueItem());
    }
}

From source file:org.conan.myhadoop02.mr.yarntest.Client.java

License:Apache License

/**
 * Main run function for the client/*  w w  w. j  a v  a 2 s.  co  m*/
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.datanucleus.test.BaseTest.java

License:Open Source License

@BeforeClass
public static void beforeClass() throws Exception {
    conf = new HBaseConfiguration();
    dfsCluster = new MiniDFSCluster(conf, 2, true, (String[]) null);

    // mangle the conf so that the fs parameter points to the minidfs we
    // just started up
    final FileSystem filesystem = dfsCluster.getFileSystem();
    conf.set("fs.default.name", filesystem.getUri().toString());
    final Path parentdir = filesystem.getHomeDirectory();
    conf.set(HConstants.HBASE_DIR, parentdir.toString());
    filesystem.mkdirs(parentdir);/* ww w  .j a v a 2 s  . com*/
    FSUtils.setVersion(filesystem, parentdir);

    preHBaseClusterSetup();
    hBaseClusterSetup();
    postHBaseClusterSetup();

    pm.set(PMF.get().getPersistenceManager());

}

From source file:org.deeplearning4j.iterativereduce.runtime.yarn.client.Client.java

License:Apache License

/**
 * TODO: consider the scenarios where we dont get enough containers 
 * - we need to re-submit the job till we get the containers alloc'd
 * //from  w w w .j a  v  a 2 s .  com
 */
@Override
public int run(String[] args) throws Exception {

    //System.out.println("IR: Client.run() [start]");

    if (args.length < 1)
        LOG.info("No configuration file specified, using default (" + ConfigFields.DEFAULT_CONFIG_FILE + ")");

    long startTime = System.currentTimeMillis();
    String configFile = (args.length < 1) ? ConfigFields.DEFAULT_CONFIG_FILE : args[0];
    Properties props = new Properties();
    Configuration conf = getConf();

    try {
        FileInputStream fis = new FileInputStream(configFile);
        props.load(fis);
    } catch (FileNotFoundException ex) {
        throw ex; // TODO: be nice
    } catch (IOException ex) {
        throw ex; // TODO: be nice
    }

    // Make sure we have some bare minimums
    ConfigFields.validateConfig(props);

    if (LOG.isDebugEnabled()) {
        LOG.debug("Loaded configuration: ");
        for (Map.Entry<Object, Object> entry : props.entrySet()) {
            LOG.debug(entry.getKey() + "=" + entry.getValue());
        }
    }

    // TODO: make sure input file(s), libs, etc. actually exist!
    // Ensure our input path exists

    Path p = new Path(props.getProperty(ConfigFields.APP_INPUT_PATH));
    FileSystem fs = FileSystem.get(conf);

    if (!fs.exists(p))
        throw new FileNotFoundException("Input path not found: " + p.toString() + " (in " + fs.getUri() + ")");

    LOG.info("Using input path: " + p.toString());

    // Connect
    ResourceManagerHandler rmHandler = new ResourceManagerHandler(conf, null);
    rmHandler.getClientResourceManager();

    // Create an Application request/ID
    ApplicationId appId = rmHandler.getApplicationId(); // Our AppId
    String appName = props.getProperty(ConfigFields.APP_NAME, ConfigFields.DEFAULT_APP_NAME).replace(' ', '_');

    LOG.info("Got an application, id=" + appId + ", appName=" + appName);

    // Copy resources to [HD]FS
    LOG.debug("Copying resources to filesystem");
    Utils.copyLocalResourcesToFs(props, conf, appId, appName); // Local resources
    Utils.copyLocalResourceToFs(configFile, ConfigFields.APP_CONFIG_FILE, conf, appId, appName); // Config file

    try {
        Utils.copyLocalResourceToFs("log4j.properties", "log4j.properties", conf, appId, appName); // Log4j
    } catch (FileNotFoundException ex) {
        LOG.warn("log4j.properties file not found");
    }

    // Create our context
    List<String> commands = Utils.getMasterCommand(conf, props);
    Map<String, LocalResource> localResources = Utils.getLocalResourcesForApplication(conf, appId, appName,
            props, LocalResourceVisibility.APPLICATION);

    // Submit app
    rmHandler.submitApplication(appId, appName, Utils.getEnvironment(conf, props), localResources, commands,
            Integer.parseInt(props.getProperty(ConfigFields.YARN_MEMORY, "512")));

    /*
     * TODO:
     * - look at updating this code region to make sure job is submitted!
     * 
     */

    StopWatch watch = new StopWatch();
    watch.start();

    // Wait for app to complete
    while (true) {
        Thread.sleep(2000);

        ApplicationReport report = rmHandler.getApplicationReport(appId);
        LOG.info("IterativeReduce report: " + " appId=" + appId.getId() + ", state: "
                + report.getYarnApplicationState().toString() + ", Running Time: " + watch.toString());

        //report.getDiagnostics()

        if (YarnApplicationState.FINISHED == report.getYarnApplicationState()) {
            LOG.info("Application finished in " + (System.currentTimeMillis() - startTime) + "ms");

            if (FinalApplicationStatus.SUCCEEDED == report.getFinalApplicationStatus()) {
                LOG.info("Application completed succesfully.");
                return 0;
            } else {
                LOG.info("Application completed with en error: " + report.getDiagnostics());
                return -1;
            }
        } else if (YarnApplicationState.FAILED == report.getYarnApplicationState()
                || YarnApplicationState.KILLED == report.getYarnApplicationState()) {

            LOG.info("Application completed with a failed or killed state: " + report.getDiagnostics());
            return -1;
        }

    }

}

From source file:org.dknight.app.Client.java

License:Apache License

/**
 * Main run function for the client/*from  w w  w  . j av a 2  s  .co m*/
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request 
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max. 
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max. 
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources         
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    FileSystem fs = FileSystem.get(conf);
    Path src = new Path(appMasterJar);
    String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";
    Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
    fs.copyFromLocalFile(false, true, src, dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);

    // Set the type of resource - file or archive
    // archives are untarred at destination
    // we don't need the jar file to be untarred for now
    amJarRsrc.setType(LocalResourceType.FILE);
    // Set visibility of the resource 
    // Setting to most private option
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    // Set the resource to be copied over
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    // Set timestamp and length of file so that the framework 
    // can do basic sanity checks for the local resource 
    // after it has been copied over to ensure it is the same 
    // resource the client intended to use with the application
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put("AppMaster.jar", amJarRsrc);

    String confXMLFSPath = "";
    {
        File clusterConfXML = new File("cluster-conf.xml");
        conf.writeXml(new FileOutputStream(clusterConfXML));
        Path confSrc = new Path(clusterConfXML.getAbsolutePath());
        String confPathSuffix = appName + "/" + appId.getId() + "/cluster-conf.xml";
        Path confDst = new Path(fs.getHomeDirectory(), confPathSuffix);
        fs.copyFromLocalFile(false, true, confSrc, confDst);
        FileStatus confFileStatus = fs.getFileStatus(confDst);
        LocalResource confRsrc = Records.newRecord(LocalResource.class);
        confRsrc.setType(LocalResourceType.FILE);
        confRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        confRsrc.setResource(ConverterUtils.getYarnUrlFromURI(confDst.toUri()));
        confRsrc.setSize(confFileStatus.getLen());
        confRsrc.setTimestamp(confFileStatus.getModificationTime());
        localResources.put("cluster-conf.xml", confRsrc);
        confXMLFSPath = confDst.toUri().getPath();
    }

    // Set the log4j properties if needed 
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        localResources.put("log4j.properties", log4jRsrc);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed. 
    // To do this, we need to first copy into the filesystem that is visible 
    // to the yarn framework. 
    // We do not need to set this as a local resource for the application 
    // master as the application master does not need it.       
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh";
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the 
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    env.put(DSConstants.CLUSTER_CONF_XML_PATH, confXMLFSPath);

    // Add AppMaster.jar location to classpath       
    // At some point we should not be required to add 
    // the hadoop specific classpaths to the env. 
    // It should be provided out of the box. 
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
            .append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master 
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command 
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    //      if (debugFlag) {
    //          vargs.add("-Xdebug -Xrunjdwp:transport=dt_socket,address=9998,server=y,suspend=y");
    //      }
    // Set class name 
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));
    if (!shellCommand.isEmpty()) {
        vargs.add("--shell_command " + shellCommand + "");
    }
    if (!shellArgs.isEmpty()) {
        vargs.add("--shell_args " + shellArgs + "");
    }
    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, only memory is supported so we set memory requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide? 
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.dutir.lucene.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveApplicationSetupToJob(JobConf jobConf, boolean getFreshProperties) throws Exception {
    // Do we load a fresh properties File?
    //TODO fix, if necessary
    //if (getFreshProperties)
    //   loadApplicationSetup(new Path(ApplicationSetup.TERRIER_HOME));

    FileSystem remoteFS = FileSystem.get(jobConf);
    URI remoteFSURI = remoteFS.getUri();
    //make a copy of the current application setup properties, these may be amended
    //as some files are more globally accessible
    final Properties propertiesDuringJob = new Properties();
    Properties appProperties = ApplicationSetup.getProperties();
    for (Object _key : appProperties.keySet()) {
        String key = (String) _key;
        propertiesDuringJob.put(key, appProperties.get(key));
    }/* ww  w.j  a  v a 2 s . c om*/

    //the share folder is needed during indexing, save this on DFS
    if (Files.getFileSystemName(ApplicationSetup.LUCENE_SHARE).equals("local")) {
        Path tempTRShare = makeTemporaryFile(jobConf, "terrier.share");
        propertiesDuringJob.setProperty("terrier.share", remoteFSURI.resolve(tempTRShare.toUri()).toString());
        logger.info("Copying terrier share/ directory to shared storage area ("
                + remoteFSURI.resolve(tempTRShare.toUri()).toString() + ")");
        FileUtil.copy(FileSystem.getLocal(jobConf), new Path(ApplicationSetup.LUCENE_SHARE), remoteFS,
                tempTRShare, false, false, jobConf);
    }

    //copy the terrier.properties content over
    Path tempTRProperties = makeTemporaryFile(jobConf, "terrier.properties");
    logger.debug("Writing terrier properties out to DFS " + tempTRProperties.toString());
    OutputStream out = remoteFS.create(tempTRProperties);
    remoteFS.deleteOnExit(tempTRProperties);
    propertiesDuringJob.store(out, "Automatically generated by HadoopPlugin.saveApplicationSetupToJob()");
    out.close();
    out = null;
    DistributedCache.addCacheFile(tempTRProperties.toUri().resolve(new URI("#terrier.properties")), jobConf);
    DistributedCache.createSymlink(jobConf);

    //copy the non-JVM system properties over as well
    Path tempSysProperties = makeTemporaryFile(jobConf, "system.properties");
    DataOutputStream dos = FileSystem.get(jobConf).create(tempSysProperties);
    logger.debug("Writing system properties out to DFS " + tempSysProperties.toString());
    for (Object _propertyKey : System.getProperties().keySet()) {
        String propertyKey = (String) _propertyKey;
        if (!startsWithAny(propertyKey, checkSystemProperties)) {
            dos.writeUTF(propertyKey);
            dos.writeUTF(System.getProperty(propertyKey));
        }
    }
    dos.writeUTF("FIN");
    dos.close();
    dos = null;
    DistributedCache.addCacheFile(tempSysProperties.toUri().resolve(new URI("#system.properties")), jobConf);
}

From source file:org.elasticsearch.plugin.hadoop.hdfs.MiniHDFSCluster.java

License:Apache License

@SuppressForbidden(reason = "Hadoop is messy")
public static void main(String[] args) throws Exception {
    FileUtil.fullyDelete(new File(System.getProperty("test.build.data", "build/test/data"), "dfs/"));
    // MiniHadoopClusterManager.main(new String[] { "-nomr" });
    Configuration cfg = new Configuration();
    cfg.set(DataNode.DATA_DIR_PERMISSION_KEY, "666");
    cfg.set("dfs.replication", "0");
    MiniDFSCluster dfsCluster = new MiniDFSCluster(cfg, 1, true, null);
    FileSystem fs = dfsCluster.getFileSystem();
    System.out.println(fs.getClass());
    System.out.println(fs.getUri());
    System.out.println(dfsCluster.getHftpFileSystem().getClass());

    // dfsCluster.shutdown();
}