List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException
From source file:org.apache.sqoop.util.FileUploader.java
License:Apache License
public static void uploadFilesToDFS(String srcBasePath, String src, String destBasePath, String dest, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path targetPath = null;//from w w w.j av a2 s. com Path srcPath = new Path(srcBasePath, src); if (destBasePath == null || destBasePath.length() == 0) { destBasePath = "."; } targetPath = new Path(destBasePath, dest); if (!fs.exists(targetPath)) { fs.mkdirs(targetPath); } Path targetPath2 = new Path(targetPath, src); fs.delete(targetPath2, true); try { LOG.info("Copying " + srcPath + " to " + targetPath); // Copy srcPath (on local FS) to targetPath on DFS. // The first boolean arg instructs not to delete source and the second // boolean arg instructs to overwrite dest if exists. fs.copyFromLocalFile(false, true, srcPath, targetPath); } catch (IOException ioe) { LOG.warn("Unable to copy " + srcPath + " to " + targetPath); } }
From source file:org.bitsofinfo.util.address.usps.ais.loader.hadoop.HadoopDataLoader.java
License:Apache License
@Override public void load(LoaderJob theJob) throws Exception { // big error/*from w w w . j a va 2s .c o m*/ if (!(theJob instanceof HadoopLoaderJob)) { throw new Exception("Cannot pass non-HadoopLoaderJob instance to HadoopDataLoader! Get your job" + " via the createJob() factory method on a HadoopDataLoader instance"); } HadoopLoaderJob loaderJob = (HadoopLoaderJob) theJob; // create our internal job tracker which will actually run the job in hadoop LoaderJobTracker jobTracker = new LoaderJobTracker(loaderJob, hadoopConfResource, hdfsJobOutputRootDir); // get the file system, so we can push the source files into HDFS Configuration conf = new Configuration(); conf.addResource(hadoopConfResource.getURL()); FileSystem hdfs = FileSystem.get(conf); // for each source data file, lets get it into HDFS for (FileHandle sourceData : loaderJob.getSourceData()) { String sourceURI = sourceData.getUri().toString(); // we currently only support local files if (sourceURI.indexOf(URIScheme.FILE.name()) != -1) { throw new Exception( "HadoopDataLoader currently only supports source data URIs that are from the local file system (file:.//)"); } // get the product type of this source file, we need this to determine the record length USPSProductType uspsProdType = (USPSProductType) sourceData .getProperty(FILE_HANDLE_PROP_USPS_PRODUCT_TYPE); if (uspsProdType == null) { throw new Exception( "Source data FileHandles passed to HadoopDataLoader must have the FILE_HANDLE_PROP_USPS_PRODUCT_TYPE property set."); } // ok, lets make sure to copy the local file into the HDFS filesystem // the target path in HDFS is /[hdfsSourceDataRootDir]/loader_job-[id]/[local source URI with file:// stripped] Path sourceFileLocalPath = new Path(new File(sourceData.getUri()).getAbsolutePath()); String localPathStripped = sourceData.getUri().toString(); localPathStripped = StringUtils.replace(localPathStripped, URIScheme.FILE.getName() + ":/", ""); Path sourceFileTargetHDFSPath = new Path(this.hdfsSourceDataRootDir + "/loader_job-" + loaderJob.getIdentifier() + "/" + localPathStripped); // copy..if not already there if (!hdfs.exists(sourceFileTargetHDFSPath)) { hdfs.copyFromLocalFile(false, true, sourceFileLocalPath, sourceFileTargetHDFSPath); } // add the source file in HDFS to the job tracker, keyed by the USPSProductType jobTracker.addSourceDataFile(uspsProdType, sourceFileTargetHDFSPath); } // have the jobTracker fire off the job jobTracker.startHadoopJob(); // add to running jobs this.runningJobs.put(loaderJob.getIdentifier(), jobTracker); // make sure to wake it synchronized (this) { notify(); } }
From source file:org.carbondata.integration.spark.load.CarbonLoaderUtil.java
License:Apache License
public static void copyMergeToHDFS(String schemaName, String cubeName, String factTable, String hdfsLocation, String localStore, int currentRestructNumber, String mergedLoadName) { try {//from www . ja va 2s . com //If the hdfs store and the local store configured differently, then copy if (hdfsLocation != null && !hdfsLocation.equals(localStore)) { /** * Identify the Load_X folder from the local store folder */ String currentloadedStore = localStore; currentloadedStore = currentloadedStore + File.separator + schemaName + File.separator + cubeName; int rsCounter = currentRestructNumber; if (rsCounter == -1) { LOGGER.info("Unable to find the local store details (RS_-1) " + currentloadedStore); return; } String localLoadedTable = currentloadedStore + File.separator + CarbonCommonConstants.RESTRUCTRE_FOLDER + rsCounter + File.separator + factTable; localLoadedTable = localLoadedTable.replace("\\", "/"); int loadCounter = CarbonUtil.checkAndReturnCurrentLoadFolderNumber(localLoadedTable); if (loadCounter == -1) { LOGGER.info("Unable to find the local store details (Load_-1) " + currentloadedStore); return; } String localLoadName = CarbonCommonConstants.LOAD_FOLDER + mergedLoadName; String localLoadFolder = localLoadedTable + File.separator + CarbonCommonConstants.LOAD_FOLDER + mergedLoadName; LOGGER.info("Local data loaded folder ... = " + localLoadFolder); //Identify the Load_X folder in the HDFS store String hdfsStoreLocation = hdfsLocation; hdfsStoreLocation = hdfsStoreLocation + File.separator + schemaName + File.separator + cubeName; rsCounter = currentRestructNumber; if (rsCounter == -1) { rsCounter = 0; } String hdfsLoadedTable = hdfsStoreLocation + File.separator + CarbonCommonConstants.RESTRUCTRE_FOLDER + rsCounter + File.separator + factTable; hdfsLoadedTable = hdfsLoadedTable.replace("\\", "/"); String hdfsStoreLoadFolder = hdfsLoadedTable + File.separator + localLoadName; LOGGER.info("HDFS data load folder ... = " + hdfsStoreLoadFolder); // Copy the data created through latest ETL run, to the HDFS store LOGGER.info("Copying " + localLoadFolder + " --> " + hdfsStoreLoadFolder); hdfsStoreLoadFolder = hdfsStoreLoadFolder.replace("\\", "/"); Path path = new Path(hdfsStoreLocation); FileSystem fs = path.getFileSystem(FileFactory.getConfiguration()); fs.copyFromLocalFile(true, true, new Path(localLoadFolder), new Path(hdfsStoreLoadFolder)); LOGGER.info("Copying sliceMetaData from " + localLoadedTable + " --> " + hdfsLoadedTable); } else { LOGGER.info("Separate carbon.storelocation.hdfs is not configured for hdfs store path"); } } catch (Exception e) { LOGGER.info(e.getMessage()); } }
From source file:org.carbondata.processing.store.CarbonFactDataHandlerColumnarMerger.java
License:Apache License
public void copyToHDFS(String loadPath) throws CarbonDataWriterException { Path path = new Path(loadPath); FileSystem fs; try {/* w w w . ja v a 2 s .c om*/ fs = path.getFileSystem(FileFactory.getConfiguration()); fs.copyFromLocalFile(true, true, new Path(destLocation), new Path(loadPath)); } catch (IOException e) { throw new CarbonDataWriterException(e.getLocalizedMessage()); } }
From source file:org.carbondata.spark.load.CarbonLoaderUtil.java
License:Apache License
public static void copyMergeToHDFS(String schemaName, String cubeName, String factTable, String hdfsLocation, String localStore, int currentRestructNumber, String mergedLoadName) { try {//from www . j av a2 s. c o m //If the hdfs store and the local store configured differently, then copy if (hdfsLocation != null && !hdfsLocation.equals(localStore)) { /** * Identify the Load_X folder from the local store folder */ String currentloadedStore = localStore; currentloadedStore = currentloadedStore + File.separator + schemaName + File.separator + cubeName; int rsCounter = currentRestructNumber; if (rsCounter == -1) { LOGGER.info("Unable to find the local store details (RS_-1) " + currentloadedStore); return; } String localLoadedTable = currentloadedStore + File.separator + CarbonCommonConstants.RESTRUCTRE_FOLDER + rsCounter + File.separator + factTable; localLoadedTable = localLoadedTable.replace("\\", "/"); int loadCounter = CarbonUtil.checkAndReturnCurrentLoadFolderNumber(localLoadedTable); if (loadCounter == -1) { LOGGER.info("Unable to find the local store details (Load_-1) " + currentloadedStore); return; } String localLoadName = CarbonCommonConstants.LOAD_FOLDER + mergedLoadName; String localLoadFolder = localLoadedTable + File.separator + CarbonCommonConstants.LOAD_FOLDER + mergedLoadName; LOGGER.info("Local data loaded folder ... = " + localLoadFolder); //Identify the Load_X folder in the HDFS store String hdfsStoreLocation = hdfsLocation; hdfsStoreLocation = hdfsStoreLocation + File.separator + schemaName + File.separator + cubeName; rsCounter = currentRestructNumber; if (rsCounter == -1) { rsCounter = 0; } String hdfsLoadedTable = hdfsStoreLocation + File.separator + CarbonCommonConstants.RESTRUCTRE_FOLDER + rsCounter + File.separator + factTable; hdfsLoadedTable = hdfsLoadedTable.replace("\\", "/"); String hdfsStoreLoadFolder = hdfsLoadedTable + File.separator + localLoadName; LOGGER.info("HDFS data load folder ... = " + hdfsStoreLoadFolder); // Copy the data created through latest ETL run, to the HDFS store LOGGER.info("Copying " + localLoadFolder + " --> " + hdfsStoreLoadFolder); hdfsStoreLoadFolder = hdfsStoreLoadFolder.replace("\\", "/"); Path path = new Path(hdfsStoreLocation); FileSystem fs = path.getFileSystem(FileFactory.getConfiguration()); fs.copyFromLocalFile(true, true, new Path(localLoadFolder), new Path(hdfsStoreLoadFolder)); LOGGER.info("Copying sliceMetaData from " + localLoadedTable + " --> " + hdfsLoadedTable); } else { LOGGER.info("Separate carbon.storelocation.hdfs is not configured for hdfs store path"); } } catch (RuntimeException e) { LOGGER.info(e.getMessage()); } catch (Exception e) { LOGGER.info(e.getMessage()); } }
From source file:org.conan.myhadoop02.mr.yarntest.Client.java
License:Apache License
/** * Main run function for the client// w ww .ja va 2s .c om * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java
License:Apache License
private static void copyToFs(Configuration conf, String local, String remote) throws IOException { FileSystem fs = FileSystem.get(conf); Path src = new Path(local); Path dst = fs.makeQualified(new Path(remote)); LOG.debug("Copying to filesystem, src=" + src.toString() + ", dst=" + dst); fs.copyFromLocalFile(false, true, src, dst); }
From source file:org.dknight.app.Client.java
License:Apache License
/** * Main run function for the client//from ww w. ja v a 2s. c om * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path src = new Path(appMasterJar); String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; Path dst = new Path(fs.getHomeDirectory(), pathSuffix); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); // Set the type of resource - file or archive // archives are untarred at destination // we don't need the jar file to be untarred for now amJarRsrc.setType(LocalResourceType.FILE); // Set visibility of the resource // Setting to most private option amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); // Set the resource to be copied over amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("AppMaster.jar", amJarRsrc); String confXMLFSPath = ""; { File clusterConfXML = new File("cluster-conf.xml"); conf.writeXml(new FileOutputStream(clusterConfXML)); Path confSrc = new Path(clusterConfXML.getAbsolutePath()); String confPathSuffix = appName + "/" + appId.getId() + "/cluster-conf.xml"; Path confDst = new Path(fs.getHomeDirectory(), confPathSuffix); fs.copyFromLocalFile(false, true, confSrc, confDst); FileStatus confFileStatus = fs.getFileStatus(confDst); LocalResource confRsrc = Records.newRecord(LocalResource.class); confRsrc.setType(LocalResourceType.FILE); confRsrc.setVisibility(LocalResourceVisibility.APPLICATION); confRsrc.setResource(ConverterUtils.getYarnUrlFromURI(confDst.toUri())); confRsrc.setSize(confFileStatus.getLen()); confRsrc.setTimestamp(confFileStatus.getModificationTime()); localResources.put("cluster-conf.xml", confRsrc); confXMLFSPath = confDst.toUri().getPath(); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); env.put(DSConstants.CLUSTER_CONF_XML_PATH, confXMLFSPath); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // if (debugFlag) { // vargs.add("-Xdebug -Xrunjdwp:transport=dt_socket,address=9998,server=y,suspend=y"); // } // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); if (!shellCommand.isEmpty()) { vargs.add("--shell_command " + shellCommand + ""); } if (!shellArgs.isEmpty()) { vargs.add("--shell_args " + shellArgs + ""); } for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:org.elasticsearch.hadoop.HdfsUtils.java
License:Apache License
public static void copyFromLocal(String localPath, String destination) { try {/*from w w w. j a v a2 s. co m*/ JobConf hadoopConfig = HdpBootstrap.hadoopConfig(); FileSystem fs = FileSystem.get(hadoopConfig); if (!(fs instanceof LocalFileSystem)) { Path src = new Path(localPath); Path dst = new Path(destination); fs.copyFromLocalFile(false, true, src, dst); System.out.println(String.format("Copying [%s] to [%s]", src, dst)); } } catch (Exception ex) { throw new RuntimeException(ex); } }
From source file:org.elasticsearch.hadoop.integration.HdfsUtils.java
License:Apache License
public static void copyFromLocal(String localPath, String destination) { try {/* ww w.java 2s . c om*/ JobConf hadoopConfig = HdpBootstrap.hadoopConfig(); FileSystem fs = FileSystem.get(hadoopConfig); if (!(fs instanceof LocalFileSystem)) { Path src = new Path(localPath); Path dst = new Path(destination); fs.copyFromLocalFile(false, true, src, dst); System.out.println(String.format("Copying [%s] to [%s]", src, dst)); } } catch (IOException ex) { throw new RuntimeException(ex); } }