List of usage examples for org.apache.hadoop.mapreduce.security TokenCache obtainTokensForNamenodes
public static void obtainTokensForNamenodes(Credentials credentials, Path[] ps, Configuration conf) throws IOException
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException { Configuration conf = HadoopCompat.getConfiguration(context); if (getCommitDirectory(conf) == null) { throw new IllegalStateException("Commit directory not configured"); }//from w w w.j a va 2 s . c o m Path workingPath = getWorkingDirectory(conf); if (workingPath == null) { throw new IllegalStateException("Working directory not configured"); } // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(HadoopCompat.getCredentials(context), new Path[] { workingPath }, conf); }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
@Override protected void validatePaths(DistCpOptions options) throws IOException, InvalidInputException { if (options.isSkipPathValidation()) { LOG.debug("Skipping Path Validation in disctp"); return;/*from w w w. j ava 2 s.c o m*/ } Path targetPath = options.getTargetPath(); FileSystem targetFS = targetPath.getFileSystem(getConf()); boolean targetIsFile = targetFS.isFile(targetPath); //If target is a file, then source has to be single file if (targetIsFile) { if (options.getSourcePaths().size() > 1) { throw new InvalidInputException("Multiple source being copied to a file: " + targetPath); } Path srcPath = options.getSourcePaths().get(0); FileSystem sourceFS = srcPath.getFileSystem(getConf()); if (!sourceFS.isFile(srcPath)) { throw new InvalidInputException( "Cannot copy " + srcPath + ", which is not a file to " + targetPath); } } for (Path path : options.getSourcePaths()) { FileSystem fs = path.getFileSystem(getConf()); if (!fs.exists(path)) { throw new InvalidInputException(path + " doesn't exist"); } } /* This is requires to allow map tasks to access each of the source clusters. This would retrieve the delegation token for each unique file system and add them to job's private credential store */ Credentials credentials = getCredentials(); if (credentials != null) { Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]); TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf()); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Sanity check for srcPath// ww w.j a v a2 s . com */ private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<IOException>(); Path[] ps = new Path[srcPaths.size()]; ps = srcPaths.toArray(ps); TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(jobConf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); } } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/* w w w . ja v a 2 s. co m*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java
License:Apache License
protected List<FileStatus> listStatus(JobContext job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from w w w .ja v a 2s .co m*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = getInputDirRecursive(job); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<FileStatus> result = simpleListStatus(job, dirs, inputFilter, recursive); LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java
License:Apache License
/** * List input directories./* w w w . ja v a 2 s .c om*/ * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // Get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); List<IOException> errors = new ArrayList<IOException>(); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job.getConfiguration()); final SmilePathFilter filter = new SmilePathFilter(); FileStatus[] matches = fs.globStatus(p, filter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { Collections.addAll(result, fs.listStatus(globStat.getPath(), filter)); } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
From source file:com.sourcecode.FileInputFormat.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. /*from w w w . j a v a 2 s . c o m*/ * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = getInputDirRecursive(job); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<FileStatus> result = null; int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS); Stopwatch sw = new Stopwatch().start(); if (numThreads == 1) { result = singleThreadedListStatus(job, dirs, inputFilter, recursive); } else { Iterable<FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job.getConfiguration(), dirs, recursive, inputFilter, true); locatedFiles = locatedFileStatusFetcher.getFileStatuses(); } catch (InterruptedException e) { throw new IOException("Interrupted while getting file statuses"); } result = Lists.newArrayList(locatedFiles); } sw.stop(); if (LOG.isDebugEnabled()) { LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis()); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. /*from w w w .j a va2s. co m*/ * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job); // Whether we need to recursive look into the directory structure boolean recursive = job.getBoolean("mapred.input.dir.recursive", false); List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }
From source file:com.yahoo.storm.yarn.StormOnYarn.java
License:Open Source License
private void launchApp(String appName, String queue, int amMB, String storm_zip_location) throws Exception { LOG.debug("StormOnYarn:launchApp() ..."); YarnClientApplication client_app = _yarn.createApplication(); GetNewApplicationResponse app = client_app.getNewApplicationResponse(); _appId = app.getApplicationId();/*from w ww. java 2 s . c om*/ LOG.debug("_appId:" + _appId); if (amMB > app.getMaximumResourceCapability().getMemory()) { //TODO need some sanity checks amMB = app.getMaximumResourceCapability().getMemory(); } ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); appContext.setApplicationId(app.getApplicationId()); appContext.setApplicationName(appName); appContext.setQueue(queue); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the // local resources LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path String appMasterJar = findContainingJar(MasterServer.class); FileSystem fs = FileSystem.get(_hadoopConf); Path src = new Path(appMasterJar); String appHome = Util.getApplicationHomeForId(_appId.toString()); Path dst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + "AppMaster.jar"); fs.copyFromLocalFile(false, true, src, dst); localResources.put("AppMaster.jar", Util.newYarnAppResource(fs, dst)); String stormVersion = Util.getStormVersion(); Path zip; if (storm_zip_location != null) { zip = new Path(storm_zip_location); } else { zip = new Path("/lib/storm/" + stormVersion + "/storm.zip"); } _stormConf.put("storm.zip.path", zip.makeQualified(fs).toUri().getPath()); LocalResourceVisibility visibility = LocalResourceVisibility.PUBLIC; _stormConf.put("storm.zip.visibility", "PUBLIC"); if (!Util.isPublic(fs, zip)) { visibility = LocalResourceVisibility.APPLICATION; _stormConf.put("storm.zip.visibility", "APPLICATION"); } localResources.put("storm", Util.newYarnAppResource(fs, zip, LocalResourceType.ARCHIVE, visibility)); Path confDst = Util.createConfigurationFileInFs(fs, appHome, _stormConf, _hadoopConf); // establish a symbolic link to conf directory localResources.put("conf", Util.newYarnAppResource(fs, confDst)); // Setup security tokens Path[] paths = new Path[3]; paths[0] = dst; paths[1] = zip; paths[2] = confDst; Credentials credentials = new Credentials(); TokenCache.obtainTokensForNamenodes(credentials, paths, _hadoopConf); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); //security tokens for HDFS distributed cache amContainer.setTokens(securityTokens); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the env variables to be setup in the env where the application master // will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // add the runtime classpath needed for tests to work Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./conf"); Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./AppMaster.jar"); //Make sure that AppMaster has access to all YARN JARs List<String> yarn_classpath_cmd = java.util.Arrays.asList("yarn", "classpath"); ProcessBuilder pb = new ProcessBuilder(yarn_classpath_cmd).redirectError(Redirect.INHERIT); LOG.info("YARN CLASSPATH COMMAND = [" + yarn_classpath_cmd + "]"); pb.environment().putAll(System.getenv()); Process proc = pb.start(); BufferedReader reader = new BufferedReader(new InputStreamReader(proc.getInputStream(), "UTF-8")); String line = ""; String yarn_class_path = (String) _stormConf.get("storm.yarn.yarn_classpath"); if (yarn_class_path == null) { StringBuilder yarn_class_path_builder = new StringBuilder(); while ((line = reader.readLine()) != null) { yarn_class_path_builder.append(line); } yarn_class_path = yarn_class_path_builder.toString(); } LOG.info("YARN CLASSPATH = [" + yarn_class_path + "]"); proc.waitFor(); reader.close(); Apps.addToEnvironment(env, Environment.CLASSPATH.name(), yarn_class_path); String stormHomeInZip = Util.getStormHomeInZip(fs, zip, stormVersion); Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./storm/" + stormHomeInZip + "/*"); Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./storm/" + stormHomeInZip + "/lib/*"); String java_home = (String) _stormConf.get("storm.yarn.java_home"); if (java_home == null) java_home = System.getenv("JAVA_HOME"); if (java_home != null && !java_home.isEmpty()) env.put("JAVA_HOME", java_home); LOG.info("Using JAVA_HOME = [" + env.get("JAVA_HOME") + "]"); env.put("appJar", appMasterJar); env.put("appName", appName); env.put("appId", new Integer(_appId.getId()).toString()); env.put("STORM_LOG_DIR", ApplicationConstants.LOG_DIR_EXPANSION_VAR); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<String> vargs = new Vector<String>(); if (java_home != null && !java_home.isEmpty()) vargs.add(env.get("JAVA_HOME") + "/bin/java"); else vargs.add("java"); vargs.add("-Dstorm.home=./storm/" + stormHomeInZip + "/"); vargs.add("-Dlogfile.name=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/master.log"); //vargs.add("-verbose:class"); vargs.add("com.yahoo.storm.yarn.MasterServer"); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"); // Set java executable command LOG.info("Setting up app master command:" + vargs); amContainer.setCommands(vargs); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMB); appContext.setResource(capability); appContext.setAMContainerSpec(amContainer); _yarn.submitApplication(appContext); }
From source file:eu.stratosphere.yarn.Utils.java
License:Apache License
public static void setTokensFor(ContainerLaunchContext amContainer, Path[] paths, Configuration conf) throws IOException { Credentials credentials = new Credentials(); // for HDFS//from w w w .j a v a 2 s.c om TokenCache.obtainTokensForNamenodes(credentials, paths, conf); // for user UserGroupInformation currUsr = UserGroupInformation.getCurrentUser(); Collection<Token<? extends TokenIdentifier>> usrTok = currUsr.getTokens(); for (Token<? extends TokenIdentifier> token : usrTok) { final Text id = new Text(token.getIdentifier()); LOG.info("Adding user token " + id + " with " + token); credentials.addToken(id, token); } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); LOG.debug("Wrote tokens. Credentials buffer length: " + dob.getLength()); ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(securityTokens); }