List of usage examples for org.apache.hadoop.mapreduce.split JobSplitWriter createSplitFiles
public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, org.apache.hadoop.mapred.InputSplit[] splits) throws IOException
From source file:ml.shifu.guagua.yarn.GuaguaYarnClient.java
License:Apache License
@SuppressWarnings("unchecked") private <T extends InputSplit> List<InputSplit> writeNewSplits(Path jobSubmitDir) throws IOException, InterruptedException { List<InputSplit> splits = createNewSplits(); T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first//from w w w. j a va 2 s . co m Arrays.sort(array, new SplitComparator()); JobSplitWriter.createSplitFiles(jobSubmitDir, getConf(), jobSubmitDir.getFileSystem(getConf()), array); return splits; }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate new-api mapreduce InputFormat splits * @param jobContext JobContext required by InputFormat * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage./* ww w .ja v a 2 s . c o m*/ * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, ClassNotFoundException { org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0); Configuration conf = jobContext.getConfiguration(); JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobContext.getCredentials()); }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate old-api mapred InputFormat splits * @param jobConf JobConf required by InputFormat class * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage./*from w w w .j a v a 2 s . c o m*/ * * @throws IOException */ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException { org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, 0); JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobConf.getCredentials()); }