List of usage examples for org.apache.hadoop.mapreduce JobContext getCredentials
public Credentials getCredentials();
From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from ww w. j a va 2 s .c om*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive", false); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { FileStatus[] fss = fs.listStatus(globStat.getPath()); for (FileStatus fileStatus : fss) { if (inputFilter.accept(fileStatus.getPath())) { if (recursive && fileStatus.isDir()) { addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter); } else { result.add(fileStatus); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total validation paths to process : " + result.size()); return result; }
From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.java
License:Apache License
/** * Unwraps the provided {@link AuthenticationToken} if it is an instance of {@link DelegationTokenStub}, reconstituting it from the provided {@link JobConf}. * * @param job/*from w ww . j a v a2 s .c om*/ * The job * @param token * The authentication token */ public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) { requireNonNull(job); requireNonNull(token); if (token instanceof DelegationTokenStub) { DelegationTokenStub delTokenStub = (DelegationTokenStub) token; Token<? extends TokenIdentifier> hadoopToken = job.getCredentials() .getToken(new Text(delTokenStub.getServiceName())); AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier(); try { identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier()))); return new DelegationTokenImpl(hadoopToken.getPassword(), identifier); } catch (IOException e) { throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e); } } return token; }
From source file:org.apache.accumulo.core.clientImpl.mapreduce.lib.ConfiguratorBase.java
License:Apache License
/** * Unwraps the provided {@link AuthenticationToken} if it is an instance of DelegationTokenStub, * reconstituting it from the provided {@link JobConf}. * * @param job//from w ww . j av a2 s. c o m * The job * @param token * The authentication token */ public static AuthenticationToken unwrapAuthenticationToken(JobContext job, AuthenticationToken token) { requireNonNull(job); requireNonNull(token); if (token instanceof org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) { org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub delTokenStub = (org.apache.accumulo.core.clientImpl.mapreduce.DelegationTokenStub) token; Token<? extends TokenIdentifier> hadoopToken = job.getCredentials() .getToken(new Text(delTokenStub.getServiceName())); AuthenticationTokenIdentifier identifier = new AuthenticationTokenIdentifier(); try { identifier.readFields(new DataInputStream(new ByteArrayInputStream(hadoopToken.getIdentifier()))); return new DelegationTokenImpl(hadoopToken.getPassword(), identifier); } catch (IOException e) { throw new RuntimeException("Could not construct DelegationToken from JobConf Credentials", e); } } return token; }
From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java
License:Apache License
/** * get data blocks of given segment// www . ja va2 s. c om */ protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable, Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo, List<Integer> oldPartitionIdList) throws IOException { QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder(); QueryStatistic statistic = new QueryStatistic(); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration()); List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds); List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>(); int partitionIndex = 0; List<Integer> partitionIdList = new ArrayList<>(); if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) { partitionIdList = partitionInfo.getPartitionIds(); } for (ExtendedBlocklet blocklet : prunedBlocklets) { long partitionId = CarbonTablePath.DataFileUtil .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath())); // OldPartitionIdList is only used in alter table partition command because it change // partition info first and then read data. // For other normal query should use newest partitionIdList if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) { if (oldPartitionIdList != null) { partitionIndex = oldPartitionIdList.indexOf((int) partitionId); } else { partitionIndex = partitionIdList.indexOf((int) partitionId); } } if (partitionIndex != -1) { // matchedPartitions variable will be null in two cases as follows // 1. the table is not a partition table // 2. the table is a partition table, and all partitions are matched by query // for partition table, the task id of carbaondata file name is the partition id. // if this partition is not required, here will skip it. if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) { CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet); if (inputSplit != null) { resultFilteredBlocks.add(inputSplit); } } } } statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis()); recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id")); return resultFilteredBlocks; }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result) throws IOException { String[] partitionsToConsider = getValidPartitions(job); if (partitionsToConsider.length == 0) { throw new IOException("No partitions/data found"); }//from w ww. j av a 2 s . co m PathFilter inputFilter = getDataFileFilter(job); CarbonTablePath tablePath = getTablePath(job.getConfiguration()); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration()); //get all data files of valid partitions and segments for (int i = 0; i < partitionsToConsider.length; ++i) { String partition = partitionsToConsider[i]; for (int j = 0; j < segmentsToConsider.length; ++j) { String segmentId = segmentsToConsider[j]; Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId)); FileSystem fs = segmentPath.getFileSystem(job.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } } }
From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormatBase.java
License:Apache License
@Override public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException { configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits); JobContext jobContext; try {/*from w ww . j a v a 2s . c o m*/ jobContext = HadoopUtils.instantiateJobContext(configuration, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } jobContext.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobContext.getCredentials().addAll(currentUserCreds); } List<org.apache.hadoop.mapreduce.InputSplit> splits; try { splits = this.mapreduceInputFormat.getSplits(jobContext); } catch (InterruptedException e) { throw new IOException("Could not get Splits.", e); } HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()]; for (int i = 0; i < hadoopInputSplits.length; i++) { hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext); } return hadoopInputSplits; }
From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java
License:Apache License
@Override public void finalizeGlobal(int parallelism) throws IOException { JobContext jobContext; TaskAttemptContext taskContext;//from ww w . j a v a 2s . c o m try { TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0") + Integer.toString(1) + "_0"); jobContext = HadoopUtils.instantiateJobContext(this.configuration, new JobID()); taskContext = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID); this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext); } catch (Exception e) { throw new RuntimeException(e); } jobContext.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobContext.getCredentials().addAll(currentUserCreds); } // finalize HDFS output format if (this.outputCommitter != null) { this.outputCommitter.commitJob(jobContext); } }
From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/*w w w .j a v a 2 s . c o m*/ final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); try { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } catch (FileNotFoundException ignored) { } }
From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java
License:Apache License
/** * Method to run the Importer MapReduce Job. Normally will be called by another MR job * during OutputCommitter.commitJob().//from www . j a va 2 s.co m * @param parentContext JobContext of the parent job * @param tableName name of table to bulk load data into * @param InputDir path of SequenceFile formatted data to read * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported * @return */ static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { Configuration parentConf = parentContext.getConfiguration(); Configuration conf = new Configuration(); for (Map.Entry<String, String> el : parentConf) { if (el.getKey().startsWith("hbase.")) conf.set(el.getKey(), el.getValue()); if (el.getKey().startsWith("mapred.cache.archives")) conf.set(el.getKey(), el.getValue()); } //Inherit jar dependencies added to distributed cache loaded by parent job conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); //Temporary fix until hbase security is ready //We need the written HFile to be world readable so //hbase regionserver user has the privileges to perform a hdfs move if (parentConf.getBoolean("hadoop.security.authorization", false)) { FsPermission.setUMask(conf, FsPermission.valueOf("----------")); } conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); boolean localMode = "local".equals(conf.get("mapred.job.tracker")); boolean success = false; try { FileSystem fs = FileSystem.get(parentConf); Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); if (!fs.mkdirs(workDir)) throw new IOException("Importer work directory already exists: " + workDir); Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); job.setWorkingDirectory(workDir); job.getCredentials().addAll(parentContext.getCredentials()); success = job.waitForCompletion(true); fs.delete(workDir, true); //We only cleanup on success because failure might've been caused by existence of target directory if (localMode && success) { new ImporterOutputFormat().getOutputCommitter( org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())) .commitJob(job); } } catch (InterruptedException e) { LOG.error("ImportSequenceFile Failed", e); } catch (ClassNotFoundException e) { LOG.error("ImportSequenceFile Failed", e); } catch (IOException e) { LOG.error("ImportSequenceFile Failed", e); } return success; }
From source file:org.apache.hcatalog.mapreduce.MultiOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { for (String alias : getOutputFormatAliases(context)) { LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); JobContext aliasContext = getJobContext(alias, context); OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext); outputFormat.checkOutputSpecs(aliasContext); // Copy credentials and any new config added back to JobContext context.getCredentials().addAll(aliasContext.getCredentials()); setAliasConf(alias, context, aliasContext); }//from w w w. j a v a 2 s . com }