List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException
From source file:org.schedoscope.export.jdbc.JdbcExportJob.java
License:Apache License
private Job configure() throws Exception { Configuration conf = getConfiguration(); conf = configureHiveMetaStore(conf); conf = configureKerberos(conf);/*from w w w . j a v a 2 s .co m*/ conf = configureAnonFields(conf); Job job = Job.getInstance(conf, "JDBCExport: " + inputDatabase + "." + inputTable); job.setJarByClass(JdbcExportJob.class); job.setMapperClass(JdbcExportMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(numReducer); if (inputFilter == null || inputFilter.trim().equals("")) { HCatInputFormat.setInput(job, inputDatabase, inputTable); } else { HCatInputFormat.setInput(job, inputDatabase, inputTable, inputFilter); } Schema outputSchema = SchemaFactory.getSchema(dbConnectionString, job.getConfiguration()); HCatSchema hcatInputSchema = HCatInputFormat.getTableSchema(job.getConfiguration()); String[] columnNames = SchemaUtils.getColumnNamesFromHcatSchema(hcatInputSchema, outputSchema); String[] columnTypes = SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, outputSchema, ImmutableSet.copyOf(anonFields)); String outputTable = inputDatabase + "_" + inputTable; JdbcOutputFormat.setOutput(job.getConfiguration(), dbConnectionString, dbUser, dbPassword, outputTable, inputFilter, numReducer, commitSize, storageEngine, distributeBy, columnNames, columnTypes); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(JdbcOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(JdbcOutputWritable.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(JdbcOutputWritable.class); Class<?> clazz = Class.forName(outputSchema.getDriverName()); String jarFile = ClassUtil.findContainingJar(clazz); String jarSelf = ClassUtil.findContainingJar(JdbcExportJob.class); FileSystem fs = FileSystem.get(job.getConfiguration()); String tmpDir = job.getConfiguration().get("hadoop.tmp.dir"); Path hdfsDir = new Path( tmpDir + "/" + new Path(jarFile).getName() + "." + RandomStringUtils.randomNumeric(20)); if (jarFile != null && jarSelf != null && tmpDir != null && !jarFile.equals(jarSelf)) { LOG.info("copy " + LOCAL_PATH_PREFIX + jarFile + " to " + tmpDir); fs.copyFromLocalFile(false, true, new Path(LOCAL_PATH_PREFIX + jarFile), hdfsDir); LOG.info("add " + hdfsDir + " to distributed cache"); job.addArchiveToClassPath(hdfsDir); } return job; }
From source file:org.smartfrog.services.hadoop.operations.utils.DfsUtils.java
License:Open Source License
/** * Copy a local file into HDFS//from ww w . ja v a 2s . c o m * * @param fileSystem filesystem for the destination * @param source source file * @param dest dest path * @param overwrite should there be an overwrite? * @throws SmartFrogRuntimeException if the copy failed */ public static void copyLocalFileIn(FileSystem fileSystem, File source, Path dest, boolean overwrite) throws SmartFrogRuntimeException { if (!source.exists()) { throw new SmartFrogRuntimeException(ERROR_MISSING_SOURCE_FILE + source); } Path localSource = new Path(source.toURI().toString()); try { fileSystem.copyFromLocalFile(false, overwrite, localSource, dest); } catch (IOException e) { throw new SmartFrogRuntimeException( FAILED_TO_COPY + source + " to " + dest + " on " + fileSystem.getUri(), e); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void copyFromLocal(String src, String src2, String... dst) { Object[] va = parseVarargs(src, src2, dst); @SuppressWarnings("unchecked") List<Path> srcs = (List<Path>) va[0]; Path dstPath = (Path) va[1]; try {//from ww w .j av a2s . c om FileSystem dstFs = getFS(dstPath); dstFs.copyFromLocalFile(false, false, srcs.toArray(new Path[srcs.size()]), dstPath); } catch (IOException ex) { throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex); } }
From source file:org.starschema.hadoop.yarn.applications.distributedshell.Client.java
License:Apache License
/** * Main run function for the client//w ww . ja va 2 s . com * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } LOG.info("Copy Hazelcast zip from local filesystem and add to local environment"); String hdfsHazelLocation = ""; long hdfsHazelLen = 0; long hdfsHazelTimestamp = 0; if (!hazelcastZip.isEmpty()) { Path hazelSrc = new Path(hazelcastZip); String hazelPathSuffix = appName + "/" + appId.toString() + "/" + HAZELCAST_PATH; Path hazelDst = new Path(fs.getHomeDirectory(), hazelPathSuffix); fs.copyFromLocalFile(false, true, hazelSrc, hazelDst); hdfsHazelLocation = hazelDst.toUri().toString(); LOG.info("Hazelcast zip location: " + hdfsHazelLocation); FileStatus hazelFileStatus = fs.getFileStatus(hazelDst); hdfsHazelLen = hazelFileStatus.getLen(); hdfsHazelTimestamp = hazelFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); env.put(DSConstants.HAZELLOCATION, hdfsHazelLocation); env.put(DSConstants.HAZELTIMESTAMP, Long.toString(hdfsHazelTimestamp)); env.put(DSConstants.HAZELLEN, Long.toString(hdfsHazelLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.wonderbee.hadoop.util.HadoopUtils.java
License:Apache License
/** Upload a local file to the cluster// www.j a v a 2 s . co m */ public static void uploadLocalFile(Path localsrc, Path hdfsdest, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(hdfsdest) && fs.getFileStatus(hdfsdest).isDir()) { fs.delete(hdfsdest, true); } fs.copyFromLocalFile(false, true, localsrc, hdfsdest); }
From source file:origin.hadoop.yarn.distributedshell.Client.java
License:Apache License
/** * Main run function for the client/*from w w w . j a v a 2s .c o m*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:proxyyarn.ProxyYarn.java
License:Apache License
public boolean run() throws Exception { Configuration conf = new YarnConfiguration(new Configuration()); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf);//from ww w. ja v a 2 s . c o m yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); log.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); log.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { log.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo("default"); log.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { log.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } FileSystem fs = FileSystem.get(conf); if (!fs.getClass().equals(DistributedFileSystem.class)) { log.error("Expected DistributedFileSystem, but was {}", fs.getClass().getSimpleName()); System.exit(1); } // ApplicationClientProtocol applicationsManager; // InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS)); // log.info("Connecting to ResourceManager at {}", rmAddress); // Configuration appManagerServerConf = new Configuration(conf); // YarnRPC rpc = YarnRPC.create(appManagerServerConf); // ApplicationClientProtocol applicationManager = (ApplicationClientProtocol) rpc.getProxy(ApplicationClientProtocol.class, rmAddress, appManagerServerConf); String appName = "AccumuloProxyYarn"; YarnClientApplication app = yarnClient.createApplication(); // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); // GetNewApplicationResponse response = applicationManager.getNewApplication(request); // log.info("Got new ApplicationId=" + response.getApplicationId()); // ApplicationId appId = response.getApplicationId(); // Create a new ApplicationSubmissionContext // ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the ApplicationId // appContext.setApplicationId(appId); // set the application name // appContext.setApplicationName(appName); // Create a new container launch context for the AM's container ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Define the local resources required Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // Lets assume the jar we need for our ApplicationMaster is available in // HDFS at a certain known path to us and we want to make it available to // the ApplicationMaster in the launched container Path localJarPath = new Path( "file:///Users/jelser/projects/accumulo-proxy-yarn/target/accumulo-proxy-yarn-0.0.1-SNAPSHOT.jar"); Path jarPath = new Path("hdfs:///accumulo-proxy-yarn-0.0.1-SNAPSHOT.jar"); fs.copyFromLocalFile(false, true, localJarPath, jarPath); FileStatus jarStatus = fs.getFileStatus(jarPath); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); // Set the type of resource - file or archive // archives are untarred at the destination by the framework amJarRsrc.setType(LocalResourceType.FILE); // Set visibility of the resource // Setting to most private option i.e. this file will only // be visible to this instance of the running application amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); // Set the location of resource to be copied over into the // working directory amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amJarRsrc.setTimestamp(jarStatus.getModificationTime()); amJarRsrc.setSize(jarStatus.getLen()); // The framework will create a symlink called AppMaster.jar in the // working directory that will be linked back to the actual file. // The ApplicationMaster, if needs to reference the jar file, would // need to use the symlink filename. localResources.put("AppMaster.jar", amJarRsrc); // Set the local resources into the launch context amContainer.setLocalResources(localResources); // Set up the environment needed for the launch context Map<String, String> env = new HashMap<String, String>(); // For example, we could setup the classpath needed. // Assuming our classes or jars are available as local resources in the // working directory from which the command will be run, we need to append // "." to the path. // By default, all the hadoop specific classpaths will already be available // in $CLASSPATH, so we should be careful not to overwrite it. String classPathEnv = "$CLASSPATH:./*:/Users/jelser/projects/accumulo-proxy-yarn/target/lib/*"; env.put("CLASSPATH", classPathEnv); amContainer.setEnvironment(env); // Construct the command to be executed on the launched container String command = "${JAVA_HOME}" + "/bin/java" + " proxyyarn.ProxyYarnAppMaster 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"; List<String> commands = new ArrayList<String>(); commands.add(command); // add additional commands if needed // Set the command array into the container spec amContainer.setCommands(commands); // Define the resource requirements for the container // For now, YARN only supports memory so we set the memory // requirements. // If the process takes more than its allocated memory, it will // be killed by the framework. // Memory being requested for should be less than max capability // of the cluster and all asks should be a multiple of the min capability. Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); appContext.setResource(capability); // Create the request to send to the ApplicationsManager // SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); // appRequest.setApplicationSubmissionContext(appContext); // Submit the application to the ApplicationsManager // Ignore the response as either a valid response object is returned on // success or an exception thrown to denote the failure // applicationManager.submitApplication(appRequest); // Set the container launch content into the ApplicationSubmissionContext appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(0); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue("default"); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure log.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); return monitorApplication(yarnClient, appId); /* Thread.sleep(200); boolean running = false; while(true) { GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); reportRequest.setApplicationId(appId); GetApplicationReportResponse reportResponse = applicationManager.getApplicationReport(reportRequest); ApplicationReport report = reportResponse.getApplicationReport(); log.info(report.toString()); YarnApplicationState state = report.getYarnApplicationState(); switch (state) { case NEW: case NEW_SAVING: case SUBMITTED: case ACCEPTED: log.info("State: {}", state); break; case RUNNING: log.info("Running application"); running = true; break; case FINISHED: case FAILED: case KILLED: log.info("State: {}", state); return; default: log.info("Unknown state: {}", state); return; } if (!running) { Thread.sleep(1000); } }*/ }
From source file:runtime.starter.MPJYarnClient.java
License:Open Source License
public void run() throws Exception { Map<String, String> map = System.getenv(); try {// ww w. j av a2s .c o m mpjHomeDir = map.get("MPJ_HOME"); if (mpjHomeDir == null) { throw new Exception("[MPJRun.java]:MPJ_HOME environment found.."); } } catch (Exception exc) { System.out.println("[MPJRun.java]:" + exc.getMessage()); exc.printStackTrace(); return; } // Copy the application master jar to HDFS // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); /* Path dataset = new Path(fs.getHomeDirectory(),"/dataset"); FileStatus datasetFile = fs.getFileStatus(dataset); BlockLocation myBlocks [] = fs.getFileBlockLocations(datasetFile,0,datasetFile.getLen()); for(BlockLocation b : myBlocks){ System.out.println("\n--------------------"); System.out.println("Length "+b.getLength()); for(String host : b.getHosts()){ System.out.println("host "+host); } } */ Path source = new Path(mpjHomeDir + "/lib/mpj-app-master.jar"); String pathSuffix = hdfsFolder + "mpj-app-master.jar"; Path dest = new Path(fs.getHomeDirectory(), pathSuffix); if (debugYarn) { logger.info("Uploading mpj-app-master.jar to: " + dest.toString()); } fs.copyFromLocalFile(false, true, source, dest); FileStatus destStatus = fs.getFileStatus(dest); Path wrapperSource = new Path(mpjHomeDir + "/lib/mpj-yarn-wrapper.jar"); String wrapperSuffix = hdfsFolder + "mpj-yarn-wrapper.jar"; Path wrapperDest = new Path(fs.getHomeDirectory(), wrapperSuffix); if (debugYarn) { logger.info("Uploading mpj-yarn-wrapper.jar to: " + wrapperDest.toString()); } fs.copyFromLocalFile(false, true, wrapperSource, wrapperDest); Path userJar = new Path(jarPath); String userJarSuffix = hdfsFolder + "user-code.jar"; Path userJarDest = new Path(fs.getHomeDirectory(), userJarSuffix); if (debugYarn) { logger.info("Uploading user-code.jar to: " + userJarDest.toString()); } fs.copyFromLocalFile(false, true, userJar, userJarDest); YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); if (debugYarn) { YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics(); logger.info("\nNodes Information"); logger.info("Number of NM: " + metrics.getNumNodeManagers() + "\n"); List<NodeReport> nodeReports = yarnClient.getNodeReports(NodeState.RUNNING); for (NodeReport n : nodeReports) { logger.info("NodeId: " + n.getNodeId()); logger.info("RackName: " + n.getRackName()); logger.info("Total Memory: " + n.getCapability().getMemory()); logger.info("Used Memory: " + n.getUsed().getMemory()); logger.info("Total vCores: " + n.getCapability().getVirtualCores()); logger.info("Used vCores: " + n.getUsed().getVirtualCores() + "\n"); } } logger.info("Creating server socket at HOST " + serverName + " PORT " + serverPort + " \nWaiting for " + np + " processes to connect..."); // Creating a server socket for incoming connections try { servSock = new ServerSocket(serverPort); infoSock = new ServerSocket(); TEMP_PORT = findPort(infoSock); } catch (Exception e) { e.printStackTrace(); } // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); int maxMem = appResponse.getMaximumResourceCapability().getMemory(); if (debugYarn) { logger.info("Max memory capability resources in cluster: " + maxMem); } if (amMem > maxMem) { amMem = maxMem; logger.info("AM memory specified above threshold of cluster " + "Using maximum memory for AM container: " + amMem); } int maxVcores = appResponse.getMaximumResourceCapability().getVirtualCores(); if (debugYarn) { logger.info("Max vCores capability resources in cluster: " + maxVcores); } if (amCores > maxVcores) { amCores = maxVcores; logger.info("AM virtual cores specified above threshold of cluster " + "Using maximum virtual cores for AM container: " + amCores); } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); List<String> commands = new ArrayList<String>(); commands.add("$JAVA_HOME/bin/java"); commands.add("-Xmx" + amMem + "m"); commands.add("runtime.starter.MPJAppMaster"); commands.add("--np"); commands.add(String.valueOf(np)); commands.add("--serverName"); commands.add(serverName); //server name commands.add("--ioServerPort"); commands.add(Integer.toString(serverPort)); //server port commands.add("--deviceName"); commands.add(deviceName); //device name commands.add("--className"); commands.add(className); //class name commands.add("--wdir"); commands.add(workingDirectory); //wdir commands.add("--psl"); commands.add(Integer.toString(psl)); //protocol switch limit commands.add("--wireUpPort"); commands.add(String.valueOf(TEMP_PORT)); //for sharing ports & rank commands.add("--wrapperPath"); commands.add(wrapperDest.toString());//MPJYarnWrapper.jar HDFS path commands.add("--userJarPath"); commands.add(userJarDest.toString());//User Jar File HDFS path commands.add("--mpjContainerPriority"); commands.add(mpjContainerPriority);// priority for mpj containers commands.add("--containerMem"); commands.add(containerMem); commands.add("--containerCores"); commands.add(containerCores); if (debugYarn) { commands.add("--debugYarn"); } if (appArgs != null) { commands.add("--appArgs"); for (int i = 0; i < appArgs.length; i++) { commands.add(appArgs[i]); } } amContainer.setCommands(commands); //set commands // Setup local Resource for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(dest)); appMasterJar.setSize(destStatus.getLen()); appMasterJar.setTimestamp(destStatus.getModificationTime()); appMasterJar.setType(LocalResourceType.ARCHIVE); appMasterJar.setVisibility(LocalResourceVisibility.APPLICATION); amContainer.setLocalResources(Collections.singletonMap("mpj-app-master.jar", appMasterJar)); // Setup CLASSPATH for ApplicationMaster // Setting up the environment Map<String, String> appMasterEnv = new HashMap<String, String>(); setupAppMasterEnv(appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMem); capability.setVirtualCores(amCores); // Finally, set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); appContext.setApplicationName(appName); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue(yarnQueue); // queue Priority priority = Priority.newInstance(amPriority); appContext.setPriority(priority); ApplicationId appId = appContext.getApplicationId(); //Adding ShutDown Hook Runtime.getRuntime().addShutdownHook(new KillYarnApp(appId, yarnClient)); // Submit application System.out.println("Submitting Application: " + appContext.getApplicationName() + "\n"); try { isRunning = true; yarnClient.submitApplication(appContext); } catch (Exception exp) { System.err.println("Error Submitting Application"); exp.printStackTrace(); } // np = number of processes , + 1 for Application Master container IOMessagesThread[] ioThreads = new IOMessagesThread[np + 1]; peers = new String[np]; socketList = new Vector<Socket>(); int wport = 0; int rport = 0; int rank = 0; // np + 1 IOThreads for (int i = 0; i < (np + 1); i++) { try { sock = servSock.accept(); //start IO thread to read STDOUT and STDERR from wrappers IOMessagesThread io = new IOMessagesThread(sock); ioThreads[i] = io; ioThreads[i].start(); } catch (Exception e) { System.err.println("Error accepting connection from peer socket.."); e.printStackTrace(); } } // Loop to read port numbers from Wrapper.java processes // and to create WRAPPER_INFO (containing all IPs and ports) String WRAPPER_INFO = "#Peer Information"; for (int i = np; i > 0; i--) { try { sock = infoSock.accept(); DataOutputStream out = new DataOutputStream(sock.getOutputStream()); DataInputStream in = new DataInputStream(sock.getInputStream()); if (in.readUTF().startsWith("Sending Info")) { wport = in.readInt(); rport = in.readInt(); rank = in.readInt(); peers[rank] = ";" + sock.getInetAddress().getHostAddress() + "@" + rport + "@" + wport + "@" + rank; socketList.add(sock); } } catch (Exception e) { System.err.println("[MPJYarnClient.java]: Error accepting" + " connection from peer socket!"); e.printStackTrace(); } } for (int i = 0; i < np; i++) { WRAPPER_INFO += peers[i]; } // Loop to broadcast WRAPPER_INFO to all Wrappers for (int i = np; i > 0; i--) { try { sock = socketList.get(np - i); DataOutputStream out = new DataOutputStream(sock.getOutputStream()); out.writeUTF(WRAPPER_INFO); out.flush(); sock.close(); } catch (Exception e) { System.err.println("[MPJYarnClient.java]: Error closing" + " connection from peer socket.."); e.printStackTrace(); } } try { infoSock.close(); } catch (IOException exp) { exp.printStackTrace(); } // wait for all IO Threads to complete for (int i = 0; i < (np + 1); i++) { ioThreads[i].join(); } isRunning = true; System.out.println("\nApplication Statistics!"); while (true) { appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); fStatus = appReport.getFinalApplicationStatus(); if (appState == YarnApplicationState.FINISHED) { isRunning = false; if (fStatus == FinalApplicationStatus.SUCCEEDED) { System.out.println("State: " + fStatus); } else { System.out.println("State: " + fStatus); } break; } else if (appState == YarnApplicationState.KILLED) { isRunning = false; System.out.println("State: " + appState); break; } else if (appState == YarnApplicationState.FAILED) { isRunning = false; System.out.println("State: " + appState); break; } Thread.sleep(100); } try { if (debugYarn) { logger.info("Cleaning the files from hdfs: "); logger.info("1) " + dest.toString()); logger.info("2) " + wrapperDest.toString()); logger.info("3) " + userJarDest.toString()); } fs.delete(dest); fs.delete(wrapperDest); fs.delete(userJarDest); } catch (IOException exp) { exp.printStackTrace(); } System.out.println("Application ID: " + appId + "\n" + "Application User: " + appReport.getUser() + "\n" + "RM Queue: " + appReport.getQueue() + "\n" + "Start Time: " + appReport.getStartTime() + "\n" + "Finish Time: " + appReport.getFinishTime()); }
From source file:simsql.runtime.MRLoader.java
License:Apache License
public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) { // make a directory for the relation Configuration conf = new Configuration(); FileSystem dfs = null; try {/*from w w w . ja va 2 s .c om*/ dfs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Cannot access HDFS!", e); } try { // if it exists, destroy it. Path path = new Path(outputPath); if (dfs.exists(path)) { dfs.delete(path, true); } } catch (Exception e) { throw new RuntimeException("Could not create the file to bulk load to!", e); } // find a file name String tempPath = null; if (inputPath.startsWith("hdfs:")) { tempPath = inputPath.replace("hdfs:", ""); } else { tempPath = "/tempDataFile_" + r.getName(); try { dfs.delete(new Path(tempPath), true); } catch (Exception e) { // ignore this. } // upload the text file try { dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath)); dfs.deleteOnExit(new Path(tempPath)); } catch (Exception e) { throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e); } } // set up the new job's parameters. conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); conf.set("io.serializations", "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt("simsql.loader.numAtts", r.getAttributes().size()); conf.setInt("simsql.loader.typeCode", (int) typeCode); conf.setInt("simsql.loader.sortAtt", sortAtt); String[] myStrings = new String[r.getAttributes().size()]; int j = 0; for (simsql.compiler.Attribute a : r.getAttributes()) { myStrings[j++] = a.getPhysicalRealization().getClass().getName(); } conf.setStrings("simsql.loader.types", myStrings); // create a job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create bulk loading job!", e); } // set the split size (number of mappers) long fSize = 0; if (inputPath.startsWith("hdfs")) { fSize = RelOp.getPathsTotalSize(new String[] { tempPath }); } else { fSize = new File(inputPath).length(); } FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks); FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks); // and the number of reducers job.setNumReduceTasks(numTasks); // the mapper/reducer/jar job.setMapperClass(MRLoaderMapper.class); job.setReducerClass(MRLoaderReducer.class); job.setJarByClass(MRLoader.class); // I/O settings. job.setOutputFormatClass(RecordOutputFormat.class); job.setMapOutputKeyClass(RecordKey.class); job.setMapOutputValueClass(RecordWrapper.class); job.setOutputKeyClass(Nothing.class); job.setOutputValueClass(Record.class); try { FileInputFormat.setInputPaths(job, new Path(tempPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (Exception e) { throw new RuntimeException("Could not set job inputs/outputs", e); } job.setGroupingComparatorClass(RecordKeyGroupingComparator.class); job.setPartitionerClass(RecordPartitioner.class); job.setSortComparatorClass(RecordKeySortComparator.class); job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath); // run it Counters counters; try { job.waitForCompletion(true); counters = job.getCounters(); } catch (Exception e) { throw new RuntimeException("Could not set up bulk loader job!", e); } // now, delete all the empty part files try { // get a filesystem FileSystem ddfs = FileSystem.get(conf); Path outPath = new Path(outputPath); if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) { FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... ddfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } // get the counter for the output of the mapper. Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); return bytesCounter.getValue(); }
From source file:test.anna.dshell.Client.java
License:Apache License
/** * Main run function for the client//from ww w. ja va2 s. co m * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); System.out.println("CLIENT>>> copy the " + appMasterJar + " to " + appMasterJarPath + " in container 0."); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }