List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:org.commoncrawl.mapred.ec2.parser.OutputCommitter.java
License:Open Source License
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { LOG.info("COMMITTER- Needs Commit Called on:" + context.getTaskAttemptID().toString()); try {//from w ww . ja va2 s. c o m Path taskOutputPath = getTempTaskOutputPath(context); if (taskOutputPath != null) { context.getProgressible().progress(); FileSystem fs = FileSystem.get(context.getJobConf()); LOG.info("COMMITTER - Default FS is:" + fs.getUri()); // Get the file-system for the task output directory FileSystem fsFromPath = taskOutputPath.getFileSystem(context.getJobConf()); // since task output path is created on demand, // if it exists, task needs a commit LOG.info("COMMITTER - Checking if outputPath Exists:" + taskOutputPath + " for task:" + context.getTaskAttemptID().toString()); if (fs.exists(taskOutputPath)) { LOG.info("Needs Commit Returning TRUE"); return true; } } } catch (IOException ioe) { throw ioe; } LOG.info("COMMITTER Needs Commit Returning FALSE"); return false; }
From source file:org.commoncrawl.mapred.ec2.postprocess.crawldb.LinkGraphDataEmitterJob.java
License:Open Source License
public LinkGraphDataEmitterJob(Configuration conf) throws Exception { FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf); LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments"); SortedSet<Long> validSegments = scanForValidSegments(fs); LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments"); SortedSet<Long> mergedSegments = scanForMergedSegments(fs); LOG.info("There are: " + mergedSegments.size() + " merged Segments"); // calculate difference Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments); LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged"); // ok we are ready to go .. //int iteration = 0; for (long segmentId : segmentsToProcess) { LOG.info("Queueing Segment:" + segmentId + " for Merge"); queue(fs, conf, segmentId);/*from ww w . jav a 2s . c o m*/ } // queue shutdown items for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) { _queue.put(new QueueItem()); } }
From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkCollectorJob.java
License:Open Source License
public LinkCollectorJob(Configuration conf) throws Exception { FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf); LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments"); SortedSet<Long> validSegments = scanForValidSegments(fs); LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments"); SortedSet<Long> mergedSegments = scanForMergedSegments(fs); LOG.info("There are: " + mergedSegments.size() + " merged Segments"); // calculate difference Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments); LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged"); // ok we are ready to go .. int iteration = 0; for (long segmentId : segmentsToProcess) { LOG.info("Queueing Segment:" + segmentId + " for Merge"); queue(fs, conf, segmentId);//from ww w .ja va 2s.c om } // queue shutdown items for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) { _queue.put(new QueueItem()); } }
From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkGraphDataEmitterJob.java
License:Open Source License
public LinkGraphDataEmitterJob(Configuration conf) throws Exception { FileSystem fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), conf); LOG.info("FileSystem is:" + fs.getUri() + " Scanning for valid segments"); SortedSet<Long> validSegments = scanForValidSegments(fs); LOG.info("There are: " + validSegments.size() + " valid segments. Scanning for Merged Segments"); SortedSet<Long> mergedSegments = scanForMergedSegments(fs); LOG.info("There are: " + mergedSegments.size() + " merged Segments"); // calculate difference Set<Long> segmentsToProcess = Sets.difference(validSegments, mergedSegments); LOG.info("There are: " + segmentsToProcess.size() + " Segments that need to be merged"); // ok we are ready to go .. int iteration = 0; for (long segmentId : segmentsToProcess) { LOG.info("Queueing Segment:" + segmentId + " for Merge"); queue(fs, conf, segmentId);/*from w ww . j a v a 2s .c o m*/ } // queue shutdown items for (int i = 0; i < MAX_SIMULTANEOUS_JOBS; ++i) { _queue.put(new QueueItem()); } }
From source file:org.conan.myhadoop02.mr.yarntest.Client.java
License:Apache License
/** * Main run function for the client/* w w w. j a v a 2 s. co m*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.datanucleus.test.BaseTest.java
License:Open Source License
@BeforeClass public static void beforeClass() throws Exception { conf = new HBaseConfiguration(); dfsCluster = new MiniDFSCluster(conf, 2, true, (String[]) null); // mangle the conf so that the fs parameter points to the minidfs we // just started up final FileSystem filesystem = dfsCluster.getFileSystem(); conf.set("fs.default.name", filesystem.getUri().toString()); final Path parentdir = filesystem.getHomeDirectory(); conf.set(HConstants.HBASE_DIR, parentdir.toString()); filesystem.mkdirs(parentdir);/* ww w .j a v a 2 s . com*/ FSUtils.setVersion(filesystem, parentdir); preHBaseClusterSetup(); hBaseClusterSetup(); postHBaseClusterSetup(); pm.set(PMF.get().getPersistenceManager()); }
From source file:org.deeplearning4j.iterativereduce.runtime.yarn.client.Client.java
License:Apache License
/** * TODO: consider the scenarios where we dont get enough containers * - we need to re-submit the job till we get the containers alloc'd * //from w w w .j a v a 2 s . com */ @Override public int run(String[] args) throws Exception { //System.out.println("IR: Client.run() [start]"); if (args.length < 1) LOG.info("No configuration file specified, using default (" + ConfigFields.DEFAULT_CONFIG_FILE + ")"); long startTime = System.currentTimeMillis(); String configFile = (args.length < 1) ? ConfigFields.DEFAULT_CONFIG_FILE : args[0]; Properties props = new Properties(); Configuration conf = getConf(); try { FileInputStream fis = new FileInputStream(configFile); props.load(fis); } catch (FileNotFoundException ex) { throw ex; // TODO: be nice } catch (IOException ex) { throw ex; // TODO: be nice } // Make sure we have some bare minimums ConfigFields.validateConfig(props); if (LOG.isDebugEnabled()) { LOG.debug("Loaded configuration: "); for (Map.Entry<Object, Object> entry : props.entrySet()) { LOG.debug(entry.getKey() + "=" + entry.getValue()); } } // TODO: make sure input file(s), libs, etc. actually exist! // Ensure our input path exists Path p = new Path(props.getProperty(ConfigFields.APP_INPUT_PATH)); FileSystem fs = FileSystem.get(conf); if (!fs.exists(p)) throw new FileNotFoundException("Input path not found: " + p.toString() + " (in " + fs.getUri() + ")"); LOG.info("Using input path: " + p.toString()); // Connect ResourceManagerHandler rmHandler = new ResourceManagerHandler(conf, null); rmHandler.getClientResourceManager(); // Create an Application request/ID ApplicationId appId = rmHandler.getApplicationId(); // Our AppId String appName = props.getProperty(ConfigFields.APP_NAME, ConfigFields.DEFAULT_APP_NAME).replace(' ', '_'); LOG.info("Got an application, id=" + appId + ", appName=" + appName); // Copy resources to [HD]FS LOG.debug("Copying resources to filesystem"); Utils.copyLocalResourcesToFs(props, conf, appId, appName); // Local resources Utils.copyLocalResourceToFs(configFile, ConfigFields.APP_CONFIG_FILE, conf, appId, appName); // Config file try { Utils.copyLocalResourceToFs("log4j.properties", "log4j.properties", conf, appId, appName); // Log4j } catch (FileNotFoundException ex) { LOG.warn("log4j.properties file not found"); } // Create our context List<String> commands = Utils.getMasterCommand(conf, props); Map<String, LocalResource> localResources = Utils.getLocalResourcesForApplication(conf, appId, appName, props, LocalResourceVisibility.APPLICATION); // Submit app rmHandler.submitApplication(appId, appName, Utils.getEnvironment(conf, props), localResources, commands, Integer.parseInt(props.getProperty(ConfigFields.YARN_MEMORY, "512"))); /* * TODO: * - look at updating this code region to make sure job is submitted! * */ StopWatch watch = new StopWatch(); watch.start(); // Wait for app to complete while (true) { Thread.sleep(2000); ApplicationReport report = rmHandler.getApplicationReport(appId); LOG.info("IterativeReduce report: " + " appId=" + appId.getId() + ", state: " + report.getYarnApplicationState().toString() + ", Running Time: " + watch.toString()); //report.getDiagnostics() if (YarnApplicationState.FINISHED == report.getYarnApplicationState()) { LOG.info("Application finished in " + (System.currentTimeMillis() - startTime) + "ms"); if (FinalApplicationStatus.SUCCEEDED == report.getFinalApplicationStatus()) { LOG.info("Application completed succesfully."); return 0; } else { LOG.info("Application completed with en error: " + report.getDiagnostics()); return -1; } } else if (YarnApplicationState.FAILED == report.getYarnApplicationState() || YarnApplicationState.KILLED == report.getYarnApplicationState()) { LOG.info("Application completed with a failed or killed state: " + report.getDiagnostics()); return -1; } } }
From source file:org.dknight.app.Client.java
License:Apache License
/** * Main run function for the client/*from w w w . j av a 2 s .co m*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path src = new Path(appMasterJar); String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; Path dst = new Path(fs.getHomeDirectory(), pathSuffix); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); // Set the type of resource - file or archive // archives are untarred at destination // we don't need the jar file to be untarred for now amJarRsrc.setType(LocalResourceType.FILE); // Set visibility of the resource // Setting to most private option amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); // Set the resource to be copied over amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("AppMaster.jar", amJarRsrc); String confXMLFSPath = ""; { File clusterConfXML = new File("cluster-conf.xml"); conf.writeXml(new FileOutputStream(clusterConfXML)); Path confSrc = new Path(clusterConfXML.getAbsolutePath()); String confPathSuffix = appName + "/" + appId.getId() + "/cluster-conf.xml"; Path confDst = new Path(fs.getHomeDirectory(), confPathSuffix); fs.copyFromLocalFile(false, true, confSrc, confDst); FileStatus confFileStatus = fs.getFileStatus(confDst); LocalResource confRsrc = Records.newRecord(LocalResource.class); confRsrc.setType(LocalResourceType.FILE); confRsrc.setVisibility(LocalResourceVisibility.APPLICATION); confRsrc.setResource(ConverterUtils.getYarnUrlFromURI(confDst.toUri())); confRsrc.setSize(confFileStatus.getLen()); confRsrc.setTimestamp(confFileStatus.getModificationTime()); localResources.put("cluster-conf.xml", confRsrc); confXMLFSPath = confDst.toUri().getPath(); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); env.put(DSConstants.CLUSTER_CONF_XML_PATH, confXMLFSPath); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // if (debugFlag) { // vargs.add("-Xdebug -Xrunjdwp:transport=dt_socket,address=9998,server=y,suspend=y"); // } // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); if (!shellCommand.isEmpty()) { vargs.add("--shell_command " + shellCommand + ""); } if (!shellArgs.isEmpty()) { vargs.add("--shell_args " + shellArgs + ""); } for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.dutir.lucene.io.HadoopUtility.java
License:Mozilla Public License
protected static void saveApplicationSetupToJob(JobConf jobConf, boolean getFreshProperties) throws Exception { // Do we load a fresh properties File? //TODO fix, if necessary //if (getFreshProperties) // loadApplicationSetup(new Path(ApplicationSetup.TERRIER_HOME)); FileSystem remoteFS = FileSystem.get(jobConf); URI remoteFSURI = remoteFS.getUri(); //make a copy of the current application setup properties, these may be amended //as some files are more globally accessible final Properties propertiesDuringJob = new Properties(); Properties appProperties = ApplicationSetup.getProperties(); for (Object _key : appProperties.keySet()) { String key = (String) _key; propertiesDuringJob.put(key, appProperties.get(key)); }/* ww w.j a v a 2 s . c om*/ //the share folder is needed during indexing, save this on DFS if (Files.getFileSystemName(ApplicationSetup.LUCENE_SHARE).equals("local")) { Path tempTRShare = makeTemporaryFile(jobConf, "terrier.share"); propertiesDuringJob.setProperty("terrier.share", remoteFSURI.resolve(tempTRShare.toUri()).toString()); logger.info("Copying terrier share/ directory to shared storage area (" + remoteFSURI.resolve(tempTRShare.toUri()).toString() + ")"); FileUtil.copy(FileSystem.getLocal(jobConf), new Path(ApplicationSetup.LUCENE_SHARE), remoteFS, tempTRShare, false, false, jobConf); } //copy the terrier.properties content over Path tempTRProperties = makeTemporaryFile(jobConf, "terrier.properties"); logger.debug("Writing terrier properties out to DFS " + tempTRProperties.toString()); OutputStream out = remoteFS.create(tempTRProperties); remoteFS.deleteOnExit(tempTRProperties); propertiesDuringJob.store(out, "Automatically generated by HadoopPlugin.saveApplicationSetupToJob()"); out.close(); out = null; DistributedCache.addCacheFile(tempTRProperties.toUri().resolve(new URI("#terrier.properties")), jobConf); DistributedCache.createSymlink(jobConf); //copy the non-JVM system properties over as well Path tempSysProperties = makeTemporaryFile(jobConf, "system.properties"); DataOutputStream dos = FileSystem.get(jobConf).create(tempSysProperties); logger.debug("Writing system properties out to DFS " + tempSysProperties.toString()); for (Object _propertyKey : System.getProperties().keySet()) { String propertyKey = (String) _propertyKey; if (!startsWithAny(propertyKey, checkSystemProperties)) { dos.writeUTF(propertyKey); dos.writeUTF(System.getProperty(propertyKey)); } } dos.writeUTF("FIN"); dos.close(); dos = null; DistributedCache.addCacheFile(tempSysProperties.toUri().resolve(new URI("#system.properties")), jobConf); }
From source file:org.elasticsearch.plugin.hadoop.hdfs.MiniHDFSCluster.java
License:Apache License
@SuppressForbidden(reason = "Hadoop is messy") public static void main(String[] args) throws Exception { FileUtil.fullyDelete(new File(System.getProperty("test.build.data", "build/test/data"), "dfs/")); // MiniHadoopClusterManager.main(new String[] { "-nomr" }); Configuration cfg = new Configuration(); cfg.set(DataNode.DATA_DIR_PERMISSION_KEY, "666"); cfg.set("dfs.replication", "0"); MiniDFSCluster dfsCluster = new MiniDFSCluster(cfg, 1, true, null); FileSystem fs = dfsCluster.getFileSystem(); System.out.println(fs.getClass()); System.out.println(fs.getUri()); System.out.println(dfsCluster.getHftpFileSystem().getClass()); // dfsCluster.shutdown(); }