List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException
From source file:com.flyhz.avengers.framework.AvengersClient.java
License:Apache License
/** * Main run function for the client//from w w w.j a v a 2 s . co m * * @return true if application completed successfully * @throws IOException * @throws YarnException */ private boolean run(String appName, List<String> commands) throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource // manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); FileSystem fs = DistributedFileSystem.get(conf); Path src = new Path(appJar); Path dst = new Path(fs.getHomeDirectory(), "avengers/" + batchId + "/avengers.jar"); if (copy) { LOG.info("copy local jar to hdfs"); fs.copyFromLocalFile(false, true, src, dst); copy = false; } this.hdfsPath = dst.toUri().toString(); LOG.info("hdfs hdfsPath = {}", dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); LOG.info("YarnURLFromPath ->{}", ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("avengers.jar", amJarRsrc); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } LOG.info("CLASSPATH -> " + classPathEnv); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); for (String cmd : commands) { LOG.info("run command {},appId {}", cmd, appId.getId()); } amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.hazelcast.yarn.YarnUtil.java
License:Open Source License
public static Path createHDFSFile(FileSystem fs, String src, String dst) throws Exception { Path dstPath = new Path(dst); fs.copyFromLocalFile(false, true, new Path(src), dstPath); return dstPath; }
From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java
License:Open Source License
public static Path cacheLocalFile(String localPath, String localName, Configuration config) throws IOException { FileSystem fs = FileSystem.get(config); Path toDir = new Path("cache"); Path destFile = new Path("cache/" + localName); File fromFile = new File(localPath + "/" + localName); if (!fromFile.exists()) { throw new IOException("Source file does not exist: " + fromFile.toString()); }/*from www. j a va 2s.com*/ boolean needToCopyFile = true; if (!fs.exists(toDir)) { // (ie relative to WD) fs.mkdirs(toDir); } else { // Now check if the file already exists if (fs.exists(destFile)) { FileStatus fsStat = fs.getFileStatus(destFile); if ((fsStat.getLen() == fromFile.length()) && (fromFile.lastModified() <= fsStat.getModificationTime())) { needToCopyFile = false; } } } if (needToCopyFile) { fs.copyFromLocalFile(false, true, new Path(localPath + "/" + localName), destFile); } return new Path(fs.getFileStatus(destFile).getPath().toUri().getPath()); // (apparently the path has to be in absolute format without even the hdfs:// at the front?!) }
From source file:com.inforefiner.hdata.SubmitClient.java
License:Apache License
/** * Main run function for the client/* w ww . j a v a 2 s. c o m*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? Thread t = new Thread(new LogReceiver()); t.start(); // Monitor the application return monitorApplication(appId); }
From source file:com.linkedin.pinot.hadoop.io.PinotRecordWriter.java
License:Apache License
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { _pinotRecordSerialization.close();//ww w . j av a2s .com _handler.close(); File dir = new File(_baseDataDir); _segmentConfig.setSegmentName(PinotOutputFormat.getSegmentName(context)); SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl(); for (File f : dir.listFiles()) { createSegment(f.getPath(), driver); } final FileSystem fs = FileSystem.get(new Configuration()); String localSegmentPath = new File(_segmentConfig.getOutDir(), _segmentConfig.getSegmentName()) .getAbsolutePath(); String localTarPath = getLocalTarFile(PinotOutputFormat.getTempSegmentDir(context)); LOGGER.info("Trying to tar the segment to: {}", localTarPath); TarGzCompressionUtils.createTarGzOfDirectory(localSegmentPath, localTarPath); String hdfsTarPath = _workDir + "/segmentTar/" + _segmentConfig.getSegmentName() + JobConfigConstants.TARGZ; LOGGER.info("*********************************************************************"); LOGGER.info("Copy from : {} to {}", localTarPath, hdfsTarPath); LOGGER.info("*********************************************************************"); fs.copyFromLocalFile(true, true, new Path(localTarPath), new Path(hdfsTarPath)); clean(PinotOutputFormat.getTempSegmentDir(context)); }
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Run a job with A list of actions and a DataFlow * //from w w w.j a v a 2s . c o m * @param df * DataFlow to be run with * @param list * of actions to run * @return ID of the Job * @throws Exception */ public String run(DataFlow df, List<RunnableElement> list, Date startTime, Date endTime) throws Exception { logger.debug("run"); String jobId = null; String error = null; final String nameWF = df.getName(); String fileName = buildFileName(df); Type bundle = df.isSchedule() && (endTime == null || endTime.after(new Date())) ? Type.BUNDLE : Type.WORKFLOW; File parentDir = new File(WorkflowPrefManager.getPathooziejob() + "/" + fileName); String hdfsWfPath = WorkflowPrefManager.getHDFSPathJobs() + "/" + fileName; Map<String, String> props = null; if (!parentDir.exists()) { parentDir.mkdirs(); } else { try { LocalFileSystem.delete(parentDir); parentDir.mkdir(); } catch (IOException e) { error = e.getMessage(); } } if (error == null) { // Creating xml OozieXmlCreator xmlCreator = null; xmlCreator = new OozieXmlForkJoinPaired(); logger.debug("run df " + df); logger.debug("run list " + list); logger.debug("run parentDir " + parentDir); error = xmlCreator.createXml(df, list, parentDir, startTime, endTime); if (error == null) { try { logger.debug(df.getCoordinators().get(0).getName()); logger.debug(df.getCoordinators().get(0).getVariables().getKeyValues()); props = writeWorkflowProp(new File(parentDir, "job.properties"), hdfsWfPath, bundle, Type.WORKFLOW.equals(bundle) ? df.getCoordinators().get(0).getVariables().getKeyValues() : null); logger.debug(props); } catch (Exception e) { error = LanguageManagerWF.getText("ooziemanager.createproperties", new Object[] { e.getMessage() }); } } } if (error == null) { logger.debug("copy from " + parentDir.getAbsolutePath() + " to " + hdfsWfPath); FileSystem fs = null; try { fs = NameNodeVar.getFS(); Path wCur = new Path(hdfsWfPath); fs.copyFromLocalFile(false, true, new Path(parentDir.getAbsolutePath()), wCur); } catch (Exception e) { logger.error(e, e); error = LanguageManagerWF.getText("ooziemanager.copydependencies", new Object[] { e.getMessage() }); } try { LocalFileSystem.delete(parentDir); } catch (Exception e1) { logger.error("Fail to remove local directory: " + e1); } } if (error == null) { // create a workflow job configuration and set the workflow application path String wfPath = WorkflowPrefManager.getSysProperty(WorkflowPrefManager.sys_namenode) + hdfsWfPath; logger.debug("Workflow path: " + wfPath); Properties conf = addProperties(oc.createConfiguration(), props); try { jobId = oc.run(conf); logger.debug("Workflow job submitted succesfully"); } catch (OozieClientException e) { logger.error(e, e); error = LanguageManagerWF.getText("ooziemanager.launchjob", new Object[] { e.getMessage() }); } } if (error != null) { logger.debug(error); throw new Exception(error); } new Thread() { public void run() { try { cleanJobDirectory(nameWF); } catch (Exception e) { logger.warn("Fail clean oozie directory for job " + nameWF); } } }.start(); return jobId; }
From source file:com.scistor.dshell.ScistorClient.java
License:Apache License
/** * Main run function for the client/*from w w w .java2 s .c om*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask // if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource // manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct // local resource for the // eventual containers that will be launched to execute the shell // scripts env.put(ScistorDSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(ScistorDSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(ScistorDSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // ========================================jar? if (containerJarPaths.length != 0) { for (int i = 0; i < containerJarPaths.length; i++) { String hdfsJarLocation = ""; String[] jarNameSplit = containerJarPaths[i].split("/"); String jarName = jarNameSplit[jarNameSplit.length - 1]; long hdfsJarLen = 0; long hdfsJarTimestamp = 0; if (!containerJarPaths[i].isEmpty()) { Path jarSrc = new Path(containerJarPaths[i]); String jarPathSuffix = appName + "/" + appId.toString() + "/" + jarName; Path jarDst = new Path(fs.getHomeDirectory(), jarPathSuffix); fs.copyFromLocalFile(false, true, jarSrc, jarDst); hdfsJarLocation = jarDst.toUri().toString(); FileStatus jarFileStatus = fs.getFileStatus(jarDst); hdfsJarLen = jarFileStatus.getLen(); hdfsJarTimestamp = jarFileStatus.getModificationTime(); env.put(ScistorDSConstants.DISTRIBUTEDJARLOCATION + i, hdfsJarLocation); env.put(ScistorDSConstants.DISTRIBUTEDJARTIMESTAMP + i, Long.toString(hdfsJarTimestamp)); env.put(ScistorDSConstants.DISTRIBUTEDJARLEN + i, Long.toString(hdfsJarLen)); } } } // ========================================jar? // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); capability.setVirtualCores(amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.sirius.hadoop.job.InputUpload.java
License:Apache License
public static void main(String[] args) throws Exception { hdfs = new URI("hdfs://hadoop1:8020"); FileSystem fs = FileSystem.get(hdfs, configuration); if (fs.exists(input_path)) { fs.delete(input_path, true);//from w w w . j a va2 s.c o m } System.out.println(Arrays.toString(fs.listStatus(new Path("/")))); fs.copyFromLocalFile(false, true, new Path("/Users/pippo/Downloads/subscriber_status.statics.input"), input_path); }
From source file:com.srini.hadoopYarn.Client.java
License:Apache License
/** * Main run function for the client//from w ww . java 2 s . com * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path src = new Path(appMasterJar); String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; Path dst = new Path(fs.getHomeDirectory(), pathSuffix); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); // Set the type of resource - file or archive // archives are untarred at destination // we don't need the jar file to be untarred for now amJarRsrc.setType(LocalResourceType.FILE); // Set visibility of the resource // Setting to most private option amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); // Set the resource to be copied over amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("AppMaster.jar", amJarRsrc); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); if (!shellCommand.isEmpty()) { vargs.add("--shell_command " + shellCommand + ""); } if (!shellArgs.isEmpty()) { vargs.add("--shell_args " + shellArgs + ""); } for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.streamsets.pipeline.spark.SparkStreamingBinding.java
License:Apache License
@Override public void init() throws Exception { for (Object key : properties.keySet()) { logMessage("Property => " + key + " => " + properties.getProperty(key.toString()), isRunningInMesos); }/* www. j a va2s. c om*/ final SparkConf conf = new SparkConf().setAppName("StreamSets Data Collector - Streaming Mode"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); final String topic = getProperty(TOPIC); final long duration; String durationAsString = getProperty(MAX_WAIT_TIME); try { duration = Long.parseLong(durationAsString); } catch (NumberFormatException ex) { String msg = "Invalid " + MAX_WAIT_TIME + " '" + durationAsString + "' : " + ex; throw new IllegalArgumentException(msg, ex); } Configuration hadoopConf = new SparkHadoopUtil().newConfiguration(conf); if (isRunningInMesos) { hadoopConf = getHadoopConf(hadoopConf); } else { hadoopConf = new Configuration(); } URI hdfsURI = FileSystem.getDefaultUri(hadoopConf); logMessage("Default FS URI: " + hdfsURI, isRunningInMesos); FileSystem hdfs = (new Path(hdfsURI)).getFileSystem(hadoopConf); Path sdcCheckpointPath = new Path(hdfs.getHomeDirectory(), ".streamsets-spark-streaming/" + getProperty("sdc.id") + "/" + encode(topic)); // encode as remote pipeline name might have colon within it String pipelineName = encode(getProperty("cluster.pipeline.name")); final Path checkPointPath = new Path(sdcCheckpointPath, pipelineName); hdfs.mkdirs(checkPointPath); if (!hdfs.isDirectory(checkPointPath)) { throw new IllegalStateException("Could not create checkpoint path: " + sdcCheckpointPath); } if (isRunningInMesos) { String scheme = hdfsURI.getScheme(); if (scheme.equals("hdfs")) { File mesosBootstrapFile = BootstrapCluster.getMesosBootstrapFile(); Path mesosBootstrapPath = new Path(checkPointPath, mesosBootstrapFile.getName()); // in case of hdfs, copy the jar file from local path to hdfs hdfs.copyFromLocalFile(false, true, new Path(mesosBootstrapFile.toURI()), mesosBootstrapPath); conf.setJars(new String[] { mesosBootstrapPath.toString() }); } else if (scheme.equals("s3") || scheme.equals("s3n") || scheme.equals("s3a")) { // we cant upload the jar to s3 as executors wont understand s3 scheme without the aws jar. // So have the jar available on http conf.setJars(new String[] { getProperty("mesos.jar.url") }); } else { throw new IllegalStateException("Unsupported scheme: " + scheme); } } JavaStreamingContextFactory javaStreamingContextFactory = new JavaStreamingContextFactoryImpl(conf, duration, checkPointPath.toString(), getProperty(METADATA_BROKER_LIST), topic, properties.getProperty(AUTO_OFFSET_RESET, "").trim(), isRunningInMesos); ssc = JavaStreamingContext.getOrCreate(checkPointPath.toString(), hadoopConf, javaStreamingContextFactory, true); // mesos tries to stop the context internally, so don't do it here - deadlock bug in spark if (!isRunningInMesos) { final Thread shutdownHookThread = new Thread("Spark.shutdownHook") { @Override public void run() { LOG.debug("Gracefully stopping Spark Streaming Application"); ssc.stop(true, true); LOG.info("Application stopped"); } }; Runtime.getRuntime().addShutdownHook(shutdownHookThread); } logMessage("Making calls through spark context ", isRunningInMesos); ssc.start(); }