List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedDataset.java
License:Apache License
@Override public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs, CopyConfiguration copyConfiguration) throws IOException { boolean enforceFileSizeMatch = this.rc.getEnforceFileSizeMatchFromConfigStore().isPresent() ? this.rc.getEnforceFileSizeMatchFromConfigStore().get() : copyConfiguration.isEnforceFileLengthMatch(); List<CopyEntity> copyableFiles = Lists.newArrayList(); EndPoint copyFromRaw = copyRoute.getCopyFrom(); EndPoint copyToRaw = copyRoute.getCopyTo(); if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) { log.warn("Currently only handle the Hadoop Fs EndPoint replication"); return copyableFiles; }/* w w w.j a v a 2s . c o m*/ if (!this.srcDataFileVersionStrategy.isPresent() || !this.dstDataFileVersionStrategy.isPresent()) { log.warn("Version strategy doesn't exist, cannot handle copy"); return copyableFiles; } if (!this.srcDataFileVersionStrategy.get().getClass().getName() .equals(this.dstDataFileVersionStrategy.get().getClass().getName())) { log.warn("Version strategy src: {} and dst: {} doesn't match, cannot handle copy.", this.srcDataFileVersionStrategy.get().getClass().getName(), this.dstDataFileVersionStrategy.get().getClass().getName()); return copyableFiles; } //For {@link HadoopFsEndPoint}s, set pathfilter and applyFilterToDirectories HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw; HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw; copyFrom.setPathFilter(pathFilter); copyFrom.setApplyFilterToDirectories(applyFilterToDirectories); copyTo.setPathFilter(pathFilter); copyTo.setApplyFilterToDirectories(applyFilterToDirectories); if (this.watermarkEnabled) { if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()) || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent() && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) { log.info( "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}", copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A", this.rc.getMetaData()); return copyableFiles; } } Configuration conf = HadoopUtils.newConfiguration(); FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf); FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf); Collection<FileStatus> allFilesInSource = copyFrom.getFiles(); Collection<FileStatus> allFilesInTarget = copyTo.getFiles(); Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(allFilesInSource); Map<Path, FileStatus> copyToFileMap = Maps.newHashMap(); for (FileStatus f : allFilesInTarget) { copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f); } Collection<Path> deletedPaths = Lists.newArrayList(); boolean watermarkMetadataCopied = false; boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource(); for (FileStatus originFileStatus : copyFromFileStatuses) { Path relative = PathUtils.relativizePath( PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()), PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath())); // construct the new path in the target file system Path newPath = new Path(copyTo.getDatasetPath(), relative); if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) { watermarkMetadataCopied = true; } boolean shouldCopy = true; if (copyToFileMap.containsKey(newPath)) { Comparable srcVer = this.srcDataFileVersionStrategy.get().getVersion(originFileStatus.getPath()); Comparable dstVer = this.dstDataFileVersionStrategy.get() .getVersion(copyToFileMap.get(newPath).getPath()); // destination has higher version, skip the copy if (srcVer.compareTo(dstVer) <= 0) { if (!enforceFileSizeMatch || copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()) { log.debug("Copy from src {} (v:{}) to dst {} (v:{}) can be skipped.", originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer); shouldCopy = false; } else { log.debug( "Copy from src {} (v:{}) to dst {} (v:{}) can not be skipped due to unmatched file length.", originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer); } } else { log.debug("Copy from src {} (v:{}) to dst {} (v:{}) is needed due to a higher version.", originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer); } } else { log.debug("Copy from src {} to dst {} is needed because dst doesn't contain the file", originFileStatus.getPath(), copyToFileMap.get(newPath)); } if (shouldCopy) { // need to remove those files in the target File System if (copyToFileMap.containsKey(newPath)) { deletedPaths.add(newPath); } CopyableFile copyableFile = CopyableFile .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath), copyConfiguration) .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString()) .dataFileVersionStrategy( this.versionStrategyFromCS.isPresent() ? this.versionStrategyFromCS.get() : null) .build(); copyableFile.setFsDatasets(copyFromFs, copyToFs); copyableFiles.add(copyableFile); } // clean up already checked paths copyToFileMap.remove(newPath); } // delete the paths on target directory if NOT exists on source if (deleteTargetIfNotExistOnSource) { deletedPaths.addAll(copyToFileMap.keySet()); } // delete old files first if (!deletedPaths.isEmpty()) { DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths, this.props); copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), deleteCommitStep, 0)); } // generate the watermark file even if watermark checking is disabled. Make sure it can come into functional once disired. if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) { copyableFiles .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(), copyTo.getDatasetPath(), copyFrom.getWatermark().get()), 1)); } return copyableFiles; }
From source file:org.apache.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Run a map/reduce job to compute Pi. */ private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf, PrintStream out) throws IOException { final String name = startDigit + "_" + nDigits; //setup wroking directory out.println("Working Directory = " + workingDir); out.println();//from w w w. j a v a 2s . c om final FileSystem fs = FileSystem.get(conf); final Path dir = fs.makeQualified(new Path(workingDir)); if (fs.exists(dir)) { throw new IOException("Working directory " + dir + " already exists. Please remove it first."); } else if (!fs.mkdirs(dir)) { throw new IOException("Cannot create working directory " + dir); } out.println("Start Digit = " + startDigit); out.println("Number of Digits = " + nDigits); out.println("Number of Maps = " + nMaps); // setup a job final Job job = createJob(name, conf); final Path hexfile = new Path(dir, "pi_" + name + ".hex"); FileOutputFormat.setOutputPath(job, new Path(dir, "out")); // setup custom properties job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString()); job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString()); job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit); job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits); job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps); // start a map/reduce job out.println("\nStarting Job ..."); final long startTime = Time.monotonicNow(); try { if (!job.waitForCompletion(true)) { out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (Time.monotonicNow() - startTime) / 1000.0; out.println("Duration is " + duration + " seconds."); } out.println("Output file: " + hexfile); }
From source file:org.apache.hadoop.examples.pi.DistSum.java
License:Apache License
/** Start a job to compute sigma */ private void compute(final String name, Summation sigma) throws IOException { if (sigma.getValue() != null) throw new IOException("sigma.getValue() != null, sigma=" + sigma); //setup remote directory final FileSystem fs = FileSystem.get(getConf()); final Path dir = fs.makeQualified(new Path(parameters.remoteDir, name)); if (!Util.createNonexistingDirectory(fs, dir)) return;/*from w w w . j a v a 2s. c o m*/ //setup a job final Job job = createJob(name, sigma); final Path outdir = new Path(dir, "out"); FileOutputFormat.setOutputPath(job, outdir); //start a map/reduce job final String startmessage = "steps/parts = " + sigma.E.getSteps() + "/" + parameters.nParts + " = " + Util.long2string(sigma.E.getSteps() / parameters.nParts); Util.runJob(name, job, parameters.machine, startmessage, timer); final List<TaskResult> results = Util.readJobOutputs(fs, outdir); Util.writeResults(name, results, fs, parameters.remoteDir); fs.delete(dir, true); //combine results final List<TaskResult> combined = Util.combine(results); final PrintWriter out = Util.createWriter(parameters.localDir, name); try { for (TaskResult r : combined) { final String s = taskResult2string(name, r); out.println(s); out.flush(); Util.out.println(s); } } finally { out.close(); } if (combined.size() == 1) { final Summation s = combined.get(0).getElement(); if (sigma.contains(s) && s.contains(sigma)) sigma.setValue(s.getValue()); } }
From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*from ww w. j av a2 s . c o m*/ */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); //setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = Time.monotonicNow(); job.waitForCompletion(true); if (!job.isSuccessful()) { System.out.println("Job " + job.getJobID() + " failed!"); System.exit(1); } final double duration = (Time.monotonicNow() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:org.apache.hama.bsp.BSPJobClient.java
License:Apache License
public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException { BSPJob job = pJob;/* w w w.j av a 2 s . c o m*/ job.setJobID(jobId); int maxTasks; int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask()); ClusterStatus clusterStatus = getClusterStatus(true); // Re-adjust the maxTasks based on cluster status. if (clusterStatus != null) { maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks(); if (configured > maxTasks) { LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with " + (maxTasks) + " tasks."); job.setNumBspTask(maxTasks); } } else { maxTasks = configured; } Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36)); Path submitSplitFile = new Path(submitJobDir, "job.split"); Path submitJarFile = new Path(submitJobDir, "job.jar"); Path submitJobFile = new Path(submitJobDir, "job.xml"); LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir); FileSystem fs = getFs(); // Create a number of filenames in the BSPMaster's fs namespace fs.delete(submitJobDir, true); submitJobDir = fs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION); FileSystem.mkdirs(fs, submitJobDir, bspSysPerms); fs.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); // only create the splits if we have an input if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) { // Create the splits for the job LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile)); InputSplit[] splits = job.getInputFormat().getSplits(job, (maxTasks > configured) ? configured : maxTasks); if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) { LOG.info("Run pre-partitioning job"); job = partition(job, splits, maxTasks); maxTasks = job.getInt("hama.partition.count", maxTasks); } if (job.getBoolean("input.has.partitioned", false)) { splits = job.getInputFormat().getSplits(job, maxTasks); } if (maxTasks < splits.length) { throw new IOException( "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: " + splits.length + ", The number of max tasks: " + maxTasks); } int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks); if (numOfSplits > configured || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) { job.setNumBspTask(numOfSplits); } job.set("bsp.job.split.file", submitSplitFile.toString()); } String originalJarPath = job.getJar(); if (originalJarPath != null) { // copy jar to BSPMaster's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION)); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(getUnixUserName()); job.set("group.name", getUnixUserGroupName(job.getUser())); if (job.getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } // Write job file to BSPMaster's fs FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION)); try { job.writeXml(out); } finally { out.close(); } return launchJob(jobId, job, submitJobFile, fs); }
From source file:org.apache.hama.bsp.FileOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem ignored, BSPJob job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null && job.getNumBspTask() != 0) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/*from w w w . jav a 2 s.com*/ if (outDir != null) { FileSystem fs = outDir.getFileSystem(job.getConfiguration()); // normalize the output directory outDir = fs.makeQualified(outDir); setOutputPath(job, outDir); // check its existence if (fs.exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:org.apache.hama.bsp.YARNBSPJobClient.java
License:Apache License
@Override protected RunningJob launchJob(BSPJobID jobId, BSPJob normalJob, Path submitJobFile, FileSystem pFs) throws IOException { YARNBSPJob job = (YARNBSPJob) normalJob; LOG.info("Submitting job..."); if (getConf().get("bsp.child.mem.in.mb") == null) { LOG.warn("BSP Child memory has not been set, YARN will guess your needs or use default values."); }/*from w w w . ja va 2 s. co m*/ FileSystem fs = pFs; if (fs == null) { fs = FileSystem.get(getConf()); } if (getConf().get("bsp.user.name") == null) { String s = getUnixUserName(); getConf().set("bsp.user.name", s); LOG.debug("Retrieved username: " + s); } yarnClient.start(); try { YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo("default"); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); // Create a new ApplicationSubmissionContext //ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); id = appContext.getApplicationId(); // set the application name appContext.setApplicationName(job.getJobName()); // Create a new container launch context for the AM's container ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Define the local resources required Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // Lets assume the jar we need for our ApplicationMaster is available in // HDFS at a certain known path to us and we want to make it available to // the ApplicationMaster in the launched container if (job.getJar() == null) { throw new IllegalArgumentException("Jar must be set in order to run the application!"); } Path jarPath = new Path(job.getJar()); jarPath = fs.makeQualified(jarPath); getConf().set("bsp.jar", jarPath.makeQualified(fs.getUri(), jarPath).toString()); FileStatus jarStatus = fs.getFileStatus(jarPath); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); amJarRsrc.setTimestamp(jarStatus.getModificationTime()); amJarRsrc.setSize(jarStatus.getLen()); // this creates a symlink in the working directory localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, amJarRsrc); // add hama related jar files to localresources for container List<File> hamaJars; if (System.getProperty("hama.home.dir") != null) hamaJars = localJarfromPath(System.getProperty("hama.home.dir")); else hamaJars = localJarfromPath(getConf().get("hama.home.dir")); String hamaPath = getSystemDir() + "/hama"; for (File fileEntry : hamaJars) { addToLocalResources(fs, fileEntry.getCanonicalPath(), hamaPath, fileEntry.getName(), localResources); } // Set the local resources into the launch context amContainer.setLocalResources(localResources); // Set up the environment needed for the launch context Map<String, String> env = new HashMap<String, String>(); // Assuming our classes or jars are available as local resources in the // working directory from which the command will be run, we need to append // "." to the path. // By default, all the hadoop specific classpaths will already be available // in $CLASSPATH, so we should be careful not to overwrite it. StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : yarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } env.put(YARNBSPConstants.HAMA_YARN_LOCATION, jarPath.toUri().toString()); env.put(YARNBSPConstants.HAMA_YARN_SIZE, Long.toString(jarStatus.getLen())); env.put(YARNBSPConstants.HAMA_YARN_TIMESTAMP, Long.toString(jarStatus.getModificationTime())); env.put(YARNBSPConstants.HAMA_LOCATION, hamaPath); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute on the allocated container Vector<CharSequence> vargs = new Vector<CharSequence>(5); vargs.add("${JAVA_HOME}/bin/java"); vargs.add("-cp " + classPathEnv + ""); vargs.add(ApplicationMaster.class.getCanonicalName()); vargs.add(submitJobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); LOG.debug("Start command: " + command); Resource capability = Records.newRecord(Resource.class); // we have at least 3 threads, which comsumes 1mb each, for each bsptask and // a base usage of 100mb capability.setMemory(3 * job.getNumBspTask() + getConf().getInt("hama.appmaster.memory.mb", 100)); LOG.info("Set memory for the application master to " + capability.getMemory() + "mb!"); // Set the container launch content into the ApplicationSubmissionContext appContext.setResource(capability); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = yarnConf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Create the request to send to the ApplicationsManager ApplicationId appId = appContext.getApplicationId(); yarnClient.submitApplication(appContext); return monitorApplication(appId) ? new NetworkedJob() : null; } catch (YarnException e) { e.printStackTrace(); return null; } }
From source file:org.apache.hama.bsp.YARNBSPJobClient.java
License:Apache License
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String fileName, Map<String, LocalResource> localResources) throws IOException { Path dstPath = new Path(fileDstPath, fileName); dstPath = fs.makeQualified(dstPath); fs.copyFromLocalFile(false, true, new Path(fileSrcPath), dstPath); FileStatus fileStatus = fs.getFileStatus(dstPath); LocalResource localRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dstPath.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, fileStatus.getLen(), fileStatus.getModificationTime()); localResources.put(fileName, localRsrc); }
From source file:org.apache.hama.util.GenericOptionsParser.java
License:Apache License
/** * Modify configuration according user-specified generic options * /* w w w .j a v a 2 s . c o m*/ * @param conf Configuration to be modified * @param line User-specified generic options */ private void processGeneralOptions(HamaConfiguration conf, CommandLine line) throws IOException { if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for (String value : values) { conf.addResource(new Path(value)); } } if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); // setting libjars in client classpath URL[] libjars = getLibJars(conf); if (libjars != null && libjars.length > 0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for (String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("hama.used.genericoptionsparser", true); // tokensFile if (line.hasOption("tokenCacheFile")) { String fileName = line.getOptionValue("tokenCacheFile"); // check if the local file exists try { FileSystem localFs = FileSystem.getLocal(conf); Path p = new Path(fileName); if (!localFs.exists(p)) { throw new FileNotFoundException("File " + fileName + " does not exist."); } LOG.debug("setting conf tokensFile: " + fileName); conf.set("hama.job.credentials.json", localFs.makeQualified(p).toString()); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:org.apache.hcatalog.mapreduce.HCatBaseInputFormat.java
License:Apache License
private void setInputPath(JobConf jobConf, String location) throws IOException { // ideally we should just call FileInputFormat.setInputPaths() here - but // that won't work since FileInputFormat.setInputPaths() needs // a Job object instead of a JobContext which we are handed here int length = location.length(); int curlyOpen = 0; int pathStart = 0; boolean globPattern = false; List<String> pathStrings = new ArrayList<String>(); for (int i = 0; i < length; i++) { char ch = location.charAt(i); switch (ch) { case '{': { curlyOpen++;//from www .j av a2 s .c o m if (!globPattern) { globPattern = true; } break; } case '}': { curlyOpen--; if (curlyOpen == 0 && globPattern) { globPattern = false; } break; } case ',': { if (!globPattern) { pathStrings.add(location.substring(pathStart, i)); pathStart = i + 1; } break; } } } pathStrings.add(location.substring(pathStart, length)); Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); String separator = ""; StringBuilder str = new StringBuilder(); for (Path path : paths) { FileSystem fs = path.getFileSystem(jobConf); final String qualifiedPath = fs.makeQualified(path).toString(); str.append(separator).append(StringUtils.escapeString(qualifiedPath)); separator = StringUtils.COMMA_STR; } jobConf.set("mapred.input.dir", str.toString()); }