List of usage examples for org.apache.hadoop.mapreduce.security TokenCache obtainTokensForNamenodes
public static void obtainTokensForNamenodes(Credentials credentials, Path[] ps, Configuration conf) throws IOException
From source file:gobblin.hadoop.token.TokenUtils.java
License:Open Source License
private static void getOtherNamenodesToken(List<String> otherNamenodes, Configuration conf, Credentials cred) throws IOException { LOG.info(OTHER_NAMENODES + ": " + otherNamenodes); Path[] ps = new Path[otherNamenodes.size()]; for (int i = 0; i < ps.length; i++) { ps[i] = new Path(otherNamenodes.get(i).trim()); }//from w w w. j a va 2s . c o m TokenCache.obtainTokensForNamenodes(cred, ps, conf); LOG.info("Successfully fetched tokens for: " + otherNamenodes); }
From source file:info.halo9pan.word2vec.hadoop.mr.SortOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/*from ww w . j a v a2s. co m*/ final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && SortInputFormat.PARTITION_FILENAME.equals(fname); } if (WordSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:info.halo9pan.word2vec.hadoop.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }/*from w w w . ja v a2s .c o m*/ final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:io.amient.kafka.hadoop.io.MultiOutputFormat.java
License:Apache License
public void checkOutputSpecs(JobContext job) throws IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }//from ww w.j av a 2 s.c o m // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); }
From source file:kogiri.mapreduce.common.kmermatch.KmerMatchInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from ww w. ja va2s . co m*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } filters.add(new KmerIndexIndexPathFilter()); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; if (inputFilter.accept(p)) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(p); result.add(status); } } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }//from www . j a va 2s . c o m // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } filters.add(new KmerIndexPartPathFilter()); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; if (inputFilter.accept(p)) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(p); result.add(status); } } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); }/*from w w w . j a v a 2 s .c om*/ // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive", false); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { FileStatus[] fss = fs.listStatus(globStat.getPath()); for (FileStatus fileStatus : fss) { if (inputFilter.accept(fileStatus.getPath())) { if (recursive && fileStatus.isDir()) { addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter); } else { result.add(fileStatus); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total validation paths to process : " + result.size()); return result; }
From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java
License:Apache License
/** * get data blocks of given segment//from ww w. j av a 2s . c om */ protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable, Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo, List<Integer> oldPartitionIdList) throws IOException { QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder(); QueryStatistic statistic = new QueryStatistic(); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration()); List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds); List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>(); int partitionIndex = 0; List<Integer> partitionIdList = new ArrayList<>(); if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) { partitionIdList = partitionInfo.getPartitionIds(); } for (ExtendedBlocklet blocklet : prunedBlocklets) { long partitionId = CarbonTablePath.DataFileUtil .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath())); // OldPartitionIdList is only used in alter table partition command because it change // partition info first and then read data. // For other normal query should use newest partitionIdList if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) { if (oldPartitionIdList != null) { partitionIndex = oldPartitionIdList.indexOf((int) partitionId); } else { partitionIndex = partitionIdList.indexOf((int) partitionId); } } if (partitionIndex != -1) { // matchedPartitions variable will be null in two cases as follows // 1. the table is not a partition table // 2. the table is a partition table, and all partitions are matched by query // for partition table, the task id of carbaondata file name is the partition id. // if this partition is not required, here will skip it. if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) { CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet); if (inputSplit != null) { resultFilteredBlocks.add(inputSplit); } } } } statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis()); recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id")); return resultFilteredBlocks; }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result) throws IOException { String[] partitionsToConsider = getValidPartitions(job); if (partitionsToConsider.length == 0) { throw new IOException("No partitions/data found"); }//from w w w .j a v a 2 s. c o m PathFilter inputFilter = getDataFileFilter(job); CarbonTablePath tablePath = getTablePath(job.getConfiguration()); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration()); //get all data files of valid partitions and segments for (int i = 0; i < partitionsToConsider.length; ++i) { String partition = partitionsToConsider[i]; for (int j = 0; j < segmentsToConsider.length; ++j) { String segmentId = segmentsToConsider[j]; Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId)); FileSystem fs = segmentPath.getFileSystem(job.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } } }
From source file:org.apache.flink.tez.client.TezExecutor.java
License:Apache License
private static void addLocalResource(TezConfiguration tezConf, Path jarPath, DAG dag) { try {/*from w ww. j a va2 s . c o m*/ org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(tezConf); LOG.info("Jar path received is " + jarPath.toString()); String jarFile = jarPath.getName(); Path remoteJarPath = null; /* if (tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR) == null) { LOG.info("Tez staging directory is null, setting it."); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); LOG.info("Setting Tez staging directory to " + stagingDir.toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); LOG.info("Set Tez staging directory to " + stagingDir.toString()); } Path stagingDir = new Path(tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR)); LOG.info("Ensuring that Tez staging directory exists"); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); LOG.info("Tez staging directory exists and is " + stagingDir.toString()); */ Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf); LOG.info("Tez staging path is " + stagingDir); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); LOG.info("Tez staging dir exists"); remoteJarPath = fs.makeQualified(new Path(stagingDir, jarFile)); LOG.info("Copying " + jarPath.toString() + " to " + remoteJarPath.toString()); fs.copyFromLocalFile(jarPath, remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); Credentials credentials = new Credentials(); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, tezConf); Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); LocalResource jobJar = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); localResources.put(jarFile.toString(), jobJar); dag.addTaskLocalFiles(localResources); LOG.info("Added job jar as local resource."); } catch (Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); System.exit(-1); } }