List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:com.cloudera.livy.test.jobs.SQLGetTweets.java
License:Apache License
@Override public List<String> call(JobContext jc) throws Exception { InputStream source = getClass().getResourceAsStream("/testweet.json"); // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem). URI input;//from w w w . j a v a 2s .c o m File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir()); Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING); FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration()); if ("file".equals(fs.getUri().getScheme())) { input = local.toURI(); } else { String uuid = UUID.randomUUID().toString(); Path target = new Path("/tmp/" + uuid + "-tweets.json"); fs.copyFromLocalFile(new Path(local.toURI()), target); input = target.toUri(); } SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx(); DataFrame df = sqlctx.jsonFile(input.toString()); df.registerTempTable("tweets"); DataFrame topTweets = sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10"); List<String> tweetList = new ArrayList<>(); for (Row r : topTweets.collect()) { tweetList.add(r.toString()); } return tweetList; }
From source file:com.cloudera.llama.server.TestMiniLlama.java
License:Apache License
private void testMiniLlama(Configuration conf, boolean writeHdfsConf) throws Exception { File confFile = null;/*from w w w . ja v a 2s . co m*/ MiniLlama server = new MiniLlama(conf); final NotificationEndPoint callbackServer = new NotificationEndPoint(); try { callbackServer.setConf(createCallbackConfiguration()); callbackServer.start(); Assert.assertNotNull(server.getConf().get(LlamaAM.CORE_QUEUES_KEY)); if (writeHdfsConf) { File confDir = new File("target", UUID.randomUUID().toString()); confDir.mkdirs(); confFile = new File(confDir, "minidfs-site.xml").getAbsoluteFile(); server.setWriteHadoopConfig(confFile.getAbsolutePath()); } server.start(); if (writeHdfsConf) { Assert.assertTrue(confFile.exists()); } Assert.assertNotSame(0, server.getAddressPort()); TTransport transport = new TSocket(server.getAddressHost(), server.getAddressPort()); transport.open(); TProtocol protocol = new TBinaryProtocol(transport); LlamaAMService.Client client = new LlamaAMService.Client(protocol); TLlamaAMRegisterRequest trReq = new TLlamaAMRegisterRequest(); trReq.setVersion(TLlamaServiceVersion.V1); trReq.setClient_id(TypeUtils.toTUniqueId(UUID.randomUUID())); TNetworkAddress tAddress = new TNetworkAddress(); tAddress.setHostname(callbackServer.getAddressHost()); tAddress.setPort(callbackServer.getAddressPort()); trReq.setNotification_callback_service(tAddress); //register TLlamaAMRegisterResponse trRes = client.Register(trReq); Assert.assertEquals(TStatusCode.OK, trRes.getStatus().getStatus_code()); //getNodes TLlamaAMGetNodesRequest tgnReq = new TLlamaAMGetNodesRequest(); tgnReq.setVersion(TLlamaServiceVersion.V1); tgnReq.setAm_handle(trRes.getAm_handle()); TLlamaAMGetNodesResponse tgnRes = client.GetNodes(tgnReq); Assert.assertEquals(TStatusCode.OK, tgnRes.getStatus().getStatus_code()); Assert.assertEquals(new HashSet<String>(server.getDataNodes()), new HashSet<String>(tgnRes.getNodes())); reserveExpandRelease(trRes, server, client, callbackServer, 1, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); reserveExpandRelease(trRes, server, client, callbackServer, 2, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); //test MiniHDFS FileSystem fs = FileSystem.get(server.getConf()); Assert.assertTrue(fs.getUri().getScheme().equals("hdfs")); fs.listStatus(new Path("/")); OutputStream os = fs.create(new Path("/test.txt")); os.write(0); os.close(); //unregister TLlamaAMUnregisterRequest turReq = new TLlamaAMUnregisterRequest(); turReq.setVersion(TLlamaServiceVersion.V1); turReq.setAm_handle(trRes.getAm_handle()); TLlamaAMUnregisterResponse turRes = client.Unregister(turReq); Assert.assertEquals(TStatusCode.OK, turRes.getStatus().getStatus_code()); } finally { server.stop(); callbackServer.stop(); } }
From source file:com.datasalt.pangool.solr.TupleSolrOutputFormat.java
License:Apache License
private void setupSolrHomeCache(File solrHome, Configuration conf) throws IOException { if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) { throw new IOException("Invalid solr.home: " + solrHome); }/*from ww w .j a v a 2 s . com*/ localSolrHome = solrHome.getAbsolutePath(); File tmpZip = File.createTempFile("solr", "zip"); createZip(solrHome, tmpZip); // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; zipName = hdfsZipName; Path zipPath = new Path("/tmp", zipName); FileSystem fs = FileSystem.get(conf); fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + zipName); DistributedCache.addCacheArchive(baseZipUrl, conf); LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(conf))); }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
private void addChanneledInputInner(Integer channel, Path location, Class<? extends Object> channelClass, Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper) throws IOException { FileSystem fS = location.getFileSystem(getJob().getConfiguration()); if (!location.toString().startsWith("/")) { // relative path location = new Path(fS.getWorkingDirectory(), location); } else {/*from www . j a v a 2 s. c o m*/ // absolute path location = new Path(fS.getUri() + location.toString()); } addInOrder(channel + "", MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_CHANNELS, getJob().getConfiguration()); addInOrder(location.toString(), MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_FILES, getJob().getConfiguration()); System.out.println("Adding file " + location + " with mapper " + mapper.getName()); MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper); }
From source file:com.datatorrent.stram.client.StramClientUtils.java
License:Apache License
public static Path getDTDFSRootDir(FileSystem fs, Configuration conf) { String dfsRootDir = conf.get(DT_DFS_ROOT_DIR); if (StringUtils.isBlank(dfsRootDir)) { return new Path(fs.getHomeDirectory(), "datatorrent"); } else {/* www . ja v a 2 s . co m*/ try { if (dfsRootDir.contains(DT_DFS_USER_NAME)) { dfsRootDir = dfsRootDir.replace(DT_DFS_USER_NAME, UserGroupInformation.getLoginUser().getShortUserName()); conf.set(DT_DFS_ROOT_DIR, dfsRootDir); } URI uri = new URI(dfsRootDir); if (uri.isAbsolute()) { return new Path(uri); } } catch (IOException ex) { LOG.warn("Error getting user login name {}", dfsRootDir, ex); } catch (URISyntaxException ex) { LOG.warn("{} is not a valid URI. Using the default filesystem to construct the path", dfsRootDir, ex); } return new Path(fs.getUri().getScheme(), fs.getUri().getAuthority(), dfsRootDir); } }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
/** * Launch application for the dag represented by this client. * * @throws YarnException//from www. ja va2 s .c o m * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { //TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq); //LOG.info("Got Cluster node info from ASM"); //for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); //} List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { //appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes() .get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory if (dag.getMasterJVMOptions() != null) { vargs.add(dag.getMasterJVMOptions()); } vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()).add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { //LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }
From source file:com.epam.hadoop.nv.yarn.Client.java
License:Apache License
/** * Main run function for the client/*from www.j a v a 2 s.c o m*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask // if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource // manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct // local resource for the // eventual containers that will be launched to execute the shell // scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and // MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
/** * Table has no partitions, just copy data * * @param conf Configuration/*w w w .j a v a2 s . c om*/ * @param outputInfo OutputInfo * @throws IOException I/O errors */ private void noPartitionsCopyData(Configuration conf, OutputInfo outputInfo) throws IOException { Preconditions.checkArgument(!outputInfo.hasPartitionInfo()); Path tablePath = new Path(outputInfo.getTableRoot()); Path writePath = new Path(outputInfo.getPartitionPath()); FileSystem tableFs = tablePath.getFileSystem(conf); FileSystem writePathFs = writePath.getFileSystem(conf); if (!tableFs.getUri().equals(writePathFs.getUri())) { LOG.error("Table's root path fs {} is not on same as its partition path fs {}", tableFs.getUri(), writePathFs.getUri()); throw new IllegalStateException("Table's root path fs " + tableFs.getUri() + " is not on same as its partition path fs " + writePathFs.getUri()); } LOG.info("No partitions, copying data from {} to {}", writePath, tablePath); FileSystems.move(tableFs, writePath, writePath, tablePath); tableFs.delete(writePath, true); }
From source file:com.flyhz.avengers.framework.AvengersClient.java
License:Apache License
/** * Main run function for the client/*from w w w . j a va 2 s . co m*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ private boolean run(String appName, List<String> commands) throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource // manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); FileSystem fs = DistributedFileSystem.get(conf); Path src = new Path(appJar); Path dst = new Path(fs.getHomeDirectory(), "avengers/" + batchId + "/avengers.jar"); if (copy) { LOG.info("copy local jar to hdfs"); fs.copyFromLocalFile(false, true, src, dst); copy = false; } this.hdfsPath = dst.toUri().toString(); LOG.info("hdfs hdfsPath = {}", dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); LOG.info("YarnURLFromPath ->{}", ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("avengers.jar", amJarRsrc); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } LOG.info("CLASSPATH -> " + classPathEnv); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); for (String cmd : commands) { LOG.info("run command {},appId {}", cmd, appId.getId()); } amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl.java
License:Apache License
private FileSystem createFileSystem(Configuration hconf, String configFile, boolean forceNew) throws IOException { FileSystem filesystem = null; // load hdfs client config file if specified. The path is on local file // system/*from w w w .j ava2s . c o m*/ if (configFile != null) { if (logger.isDebugEnabled()) { logger.debug("{}Adding resource config file to hdfs configuration:" + configFile, logPrefix); } hconf.addResource(new Path(configFile)); if (!new File(configFile).exists()) { logger.warn(LocalizedMessage.create(LocalizedStrings.HOPLOG_HDFS_CLIENT_CONFIG_FILE_ABSENT, configFile)); } } // This setting disables shutdown hook for file system object. Shutdown // hook may cause FS object to close before the cache or store and // unpredictable behavior. This setting is provided for GFXD like server // use cases where FS close is managed by a server. This setting is not // supported by old versions of hadoop, HADOOP-4829 hconf.setBoolean("fs.automatic.close", false); // Hadoop has a configuration parameter io.serializations that is a list of serialization // classes which can be used for obtaining serializers and deserializers. This parameter // by default contains avro classes. When a sequence file is created, it calls // SerializationFactory.getSerializer(keyclass). This internally creates objects using // reflection of all the classes that were part of io.serializations. But since, there is // no avro class available it throws an exception. // Before creating a sequenceFile, override the io.serializations parameter and pass only the classes // that are important to us. hconf.setStrings("io.serializations", new String[] { "org.apache.hadoop.io.serializer.WritableSerialization" }); // create writer SchemaMetrics.configureGlobally(hconf); String nameNodeURL = null; if ((nameNodeURL = getNameNodeURL()) == null) { nameNodeURL = hconf.get("fs.default.name"); } URI namenodeURI = URI.create(nameNodeURL); //if (! GemFireCacheImpl.getExisting().isHadoopGfxdLonerMode()) { String authType = hconf.get("hadoop.security.authentication"); //The following code handles Gemfire XD with secure HDFS //A static set is used to cache all known secure HDFS NameNode urls. UserGroupInformation.setConfiguration(hconf); //Compare authentication method ignoring case to make GFXD future version complaint //At least version 2.0.2 starts complaining if the string "kerberos" is not in all small case. //However it seems current version of hadoop accept the authType in any case if (authType.equalsIgnoreCase("kerberos")) { String principal = hconf.get(HoplogConfig.KERBEROS_PRINCIPAL); String keyTab = hconf.get(HoplogConfig.KERBEROS_KEYTAB_FILE); if (!PERFORM_SECURE_HDFS_CHECK) { if (logger.isDebugEnabled()) logger.debug("{}Ignore secure hdfs check", logPrefix); } else { if (!secureNameNodes.contains(nameNodeURL)) { if (logger.isDebugEnabled()) logger.debug("{}Executing secure hdfs check", logPrefix); try { filesystem = FileSystem.newInstance(namenodeURI, hconf); //Make sure no IOExceptions are generated when accessing insecure HDFS. filesystem.listFiles(new Path("/"), false); throw new HDFSIOException( "Gemfire XD HDFS client and HDFS cluster security levels do not match. The configured HDFS Namenode is not secured."); } catch (IOException ex) { secureNameNodes.add(nameNodeURL); } finally { //Close filesystem to avoid resource leak if (filesystem != null) { closeFileSystemIgnoreError(filesystem); } } } } // check to ensure the namenode principal is defined String nameNodePrincipal = hconf.get("dfs.namenode.kerberos.principal"); if (nameNodePrincipal == null) { throw new IOException(LocalizedStrings.GF_KERBEROS_NAMENODE_PRINCIPAL_UNDEF.toLocalizedString()); } // ok, the user specified a gfxd principal so we will try to login if (principal != null) { //If NameNode principal is the same as Gemfire XD principal, there is a //potential security hole String regex = "[/@]"; if (nameNodePrincipal != null) { String HDFSUser = nameNodePrincipal.split(regex)[0]; String GFXDUser = principal.split(regex)[0]; if (HDFSUser.equals(GFXDUser)) { logger.warn( LocalizedMessage.create(LocalizedStrings.HDFS_USER_IS_SAME_AS_GF_USER, GFXDUser)); } } // a keytab must exist if the user specifies a principal if (keyTab == null) { throw new IOException(LocalizedStrings.GF_KERBEROS_KEYTAB_UNDEF.toLocalizedString()); } // the keytab must exist as well File f = new File(keyTab); if (!f.exists()) { throw new FileNotFoundException( LocalizedStrings.GF_KERBEROS_KEYTAB_FILE_ABSENT.toLocalizedString(f.getAbsolutePath())); } //Authenticate Gemfire XD principal to Kerberos KDC using Gemfire XD keytab file String principalWithValidHost = SecurityUtil.getServerPrincipal(principal, ""); UserGroupInformation.loginUserFromKeytab(principalWithValidHost, keyTab); } else { logger.warn(LocalizedMessage.create(LocalizedStrings.GF_KERBEROS_PRINCIPAL_UNDEF)); } } //} filesystem = getFileSystemFactory().create(namenodeURI, hconf, forceNew); if (logger.isDebugEnabled()) { logger.debug("{}Initialized FileSystem linked to " + filesystem.getUri() + " " + filesystem.hashCode(), logPrefix); } return filesystem; }