List of usage examples for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitMetaFile
public static Path getJobSplitMetaFile(Path jobSubmissionDir)
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, T[] splits) throws IOException, InterruptedException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeNewSplits(conf, splits, out); out.close();//from w w w . j a v a2 s . co m writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, org.apache.hadoop.mapred.InputSplit[] splits) throws IOException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeOldSplits(splits, out, conf); out.close();//w ww. ja va 2 s.c om writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate new-api mapreduce InputFormat splits * @param jobContext JobContext required by InputFormat * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage.//ww w .ja v a 2 s. c o m * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, ClassNotFoundException { org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0); Configuration conf = jobContext.getConfiguration(); JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobContext.getCredentials()); }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate old-api mapred InputFormat splits * @param jobConf JobConf required by InputFormat class * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage.//from w w w .jav a 2 s. com * * @throws IOException */ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException { org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, 0); JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobConf.getCredentials()); }