List of usage examples for org.apache.hadoop.mapreduce.security TokenCache obtainTokensForNamenodes
public static void obtainTokensForNamenodes(Credentials credentials, Path[] ps, Configuration conf) throws IOException
From source file:azkaban.security.HadoopSecurityManager_H_2_0.java
License:Apache License
@Override public synchronized void prefetchToken(final File tokenFile, final Props props, final Logger logger) throws HadoopSecurityManagerException { final String userToProxy = props.getString(USER_TO_PROXY); logger.info("Getting hadoop tokens based on props for " + userToProxy); final Credentials cred = new Credentials(); if (props.getBoolean(OBTAIN_HCAT_TOKEN, false)) { try {/*w ww .jav a 2 s . co m*/ // first we fetch and save the default hcat token. logger.info("Pre-fetching default Hive MetaStore token from hive"); HiveConf hiveConf = new HiveConf(); Token<DelegationTokenIdentifier> hcatToken = fetchHcatToken(userToProxy, hiveConf, null, logger); cred.addToken(hcatToken.getService(), hcatToken); // check and see if user specified the extra hcat locations we need to // look at and fetch token. final List<String> extraHcatLocations = props.getStringList(EXTRA_HCAT_LOCATION); if (Collections.EMPTY_LIST != extraHcatLocations) { logger.info("Need to pre-fetch extra metaStore tokens from hive."); // start to process the user inputs. for (String thriftUrl : extraHcatLocations) { logger.info("Pre-fetching metaStore token from : " + thriftUrl); hiveConf = new HiveConf(); hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, thriftUrl); hcatToken = fetchHcatToken(userToProxy, hiveConf, thriftUrl, logger); cred.addToken(hcatToken.getService(), hcatToken); } } } catch (Throwable t) { String message = "Failed to get hive metastore token." + t.getMessage() + t.getCause(); logger.error(message, t); throw new HadoopSecurityManagerException(message); } } if (props.getBoolean(OBTAIN_JOBHISTORYSERVER_TOKEN, false)) { YarnRPC rpc = YarnRPC.create(conf); final String serviceAddr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS); logger.debug("Connecting to HistoryServer at: " + serviceAddr); HSClientProtocol hsProxy = (HSClientProtocol) rpc.getProxy(HSClientProtocol.class, NetUtils.createSocketAddr(serviceAddr), conf); logger.info("Pre-fetching JH token from job history server"); Token<?> jhsdt = null; try { jhsdt = getDelegationTokenFromHS(hsProxy); } catch (Exception e) { logger.error("Failed to fetch JH token", e); throw new HadoopSecurityManagerException("Failed to fetch JH token for " + userToProxy); } if (jhsdt == null) { logger.error("getDelegationTokenFromHS() returned null"); throw new HadoopSecurityManagerException("Unable to fetch JH token for " + userToProxy); } logger.info("Created JH token: " + jhsdt.toString()); logger.info("Token kind: " + jhsdt.getKind()); logger.info("Token id: " + jhsdt.getIdentifier()); logger.info("Token service: " + jhsdt.getService()); cred.addToken(jhsdt.getService(), jhsdt); } try { getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { getToken(userToProxy); return null; } private void getToken(String userToProxy) throws InterruptedException, IOException, HadoopSecurityManagerException { logger.info("Here is the props for " + OBTAIN_NAMENODE_TOKEN + ": " + props.getBoolean(OBTAIN_NAMENODE_TOKEN)); if (props.getBoolean(OBTAIN_NAMENODE_TOKEN, false)) { FileSystem fs = FileSystem.get(conf); // check if we get the correct FS, and most importantly, the // conf logger.info("Getting DFS token from " + fs.getUri()); Token<?> fsToken = fs .getDelegationToken(getMRTokenRenewerInternal(new JobConf()).toString()); if (fsToken == null) { logger.error("Failed to fetch DFS token for "); throw new HadoopSecurityManagerException( "Failed to fetch DFS token for " + userToProxy); } logger.info("Created DFS token: " + fsToken.toString()); logger.info("Token kind: " + fsToken.getKind()); logger.info("Token id: " + fsToken.getIdentifier()); logger.info("Token service: " + fsToken.getService()); cred.addToken(fsToken.getService(), fsToken); // getting additional name nodes tokens String otherNamenodes = props.get(OTHER_NAMENODES_TO_GET_TOKEN); if ((otherNamenodes != null) && (otherNamenodes.length() > 0)) { logger.info(OTHER_NAMENODES_TO_GET_TOKEN + ": '" + otherNamenodes + "'"); String[] nameNodeArr = otherNamenodes.split(","); Path[] ps = new Path[nameNodeArr.length]; for (int i = 0; i < ps.length; i++) { ps[i] = new Path(nameNodeArr[i].trim()); } TokenCache.obtainTokensForNamenodes(cred, ps, conf); logger.info("Successfully fetched tokens for: " + otherNamenodes); } else { logger.info(OTHER_NAMENODES_TO_GET_TOKEN + " was not configured"); } } if (props.getBoolean(OBTAIN_JOBTRACKER_TOKEN, false)) { JobConf jobConf = new JobConf(); JobClient jobClient = new JobClient(jobConf); logger.info("Pre-fetching JT token from JobTracker"); Token<DelegationTokenIdentifier> mrdt = jobClient .getDelegationToken(getMRTokenRenewerInternal(jobConf)); if (mrdt == null) { logger.error("Failed to fetch JT token"); throw new HadoopSecurityManagerException("Failed to fetch JT token for " + userToProxy); } logger.info("Created JT token: " + mrdt.toString()); logger.info("Token kind: " + mrdt.getKind()); logger.info("Token id: " + mrdt.getIdentifier()); logger.info("Token service: " + mrdt.getService()); cred.addToken(mrdt.getService(), mrdt); } } }); FileOutputStream fos = null; DataOutputStream dos = null; try { fos = new FileOutputStream(tokenFile); dos = new DataOutputStream(fos); cred.writeTokenStorageToStream(dos); } finally { if (dos != null) { try { dos.close(); } catch (Throwable t) { // best effort logger.error("encountered exception while closing DataOutputStream of the tokenFile", t); } } if (fos != null) { fos.close(); } } // stash them to cancel after use. logger.info("Tokens loaded in " + tokenFile.getAbsolutePath()); } catch (Exception e) { throw new HadoopSecurityManagerException( "Failed to get hadoop tokens! " + e.getMessage() + e.getCause(), e); } catch (Throwable t) { throw new HadoopSecurityManagerException( "Failed to get hadoop tokens! " + t.getMessage() + t.getCause(), t); } }
From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java
License:Open Source License
private Path prepareEnsureStagingDir(TezConfiguration workingConf) throws IOException { String stepStagingPath = createStepStagingPath(); workingConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stepStagingPath); Path stagingDir = new Path(stepStagingPath); FileSystem fileSystem = FileSystem.get(workingConf); stagingDir = fileSystem.makeQualified(stagingDir); TokenCache.obtainTokensForNamenodes(new Credentials(), new Path[] { stagingDir }, workingConf); TezClientUtils.ensureStagingDirExists(workingConf, stagingDir); if (fileSystem.getScheme().startsWith("file:/")) new File(stagingDir.toUri()).mkdirs(); return stagingDir; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }/*from w ww . ja v a 2s .co m*/ // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); // we permit multiple jobs writing to the same output directory. We handle this by each one writing to distinct // paths within that directory. See createJobSpecificPath method and usages of it. // additionally check that output dataset and dynamic partitioner class name has been set in conf if (job.getConfiguration().get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET) == null) { throw new InvalidJobConfException("The job configuration does not contain required property: " + Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); } Class<? extends DynamicPartitioner> partitionerClass = job.getConfiguration().getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); if (partitionerClass == null) { throw new InvalidJobConfException("The job configuration does not contain required property: " + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME); } Class<? extends FileOutputFormat> delegateOutputFormatClass = job.getConfiguration().getClass( Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, null, FileOutputFormat.class); if (delegateOutputFormatClass == null) { throw new InvalidJobConfException("The job configuration does not contain required property: " + Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME); } }
From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ }/*from www .j a v a2s . c om*/ Path path = getOutputPath(context); if (TemporaryOutputFormat.getOutputPath(context) == null) { throw new IOException("Temporary output path is not set"); } TokenCache.obtainTokensForNamenodes(context.getCredentials(), new Path[] { path }, context.getConfiguration()); if (path.getFileSystem(context.getConfiguration()).exists(path)) { throw new IOException(MessageFormat.format("Output directory {0} already exists", path)); } }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. /*w w w .j a v a2 s. com*/ * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job); List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }/*from w w w . ja v a 2s.c om*/ // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); // if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { // throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); // } }
From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }//ww w . ja v a 2 s. c o m // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); }
From source file:com.david.mos.out.FileOutputFormat.java
License:Apache License
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }/*from ww w .j a v a 2 s . c o m*/ // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. //from w w w .ja va2 s . com * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. // w ww . j a v a 2 s . c o m * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = getInputDirRecursive(job); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<FileStatus> result = null; int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, DEFAULT_LIST_STATUS_NUM_THREADS); Stopwatch sw = Stopwatch.createStarted(); if (numThreads == 1) { result = singleThreadedListStatus(job, dirs, inputFilter, recursive); } else { Iterable<FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job.getConfiguration(), dirs, recursive, inputFilter, true); locatedFiles = locatedFileStatusFetcher.getFileStatuses(); } catch (InterruptedException e) { throw new IOException("Interrupted while getting file statuses"); } result = Lists.newArrayList(locatedFiles); } sw.stop(); if (LOG.isDebugEnabled()) { LOG.debug("Time taken to get FileStatuses: " + sw.elapsed(TimeUnit.MILLISECONDS)); } LOG.info("Total input paths to process : " + result.size()); return result; }