Example usage for org.apache.hadoop.mapreduce.split JobSplitWriter createSplitFiles

List of usage examples for org.apache.hadoop.mapreduce.split JobSplitWriter createSplitFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.split JobSplitWriter createSplitFiles.

Prototype

public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs,
            org.apache.hadoop.mapred.InputSplit[] splits) throws IOException 

Source Link

Usage

From source file:ml.shifu.guagua.yarn.GuaguaYarnClient.java

License:Apache License

@SuppressWarnings("unchecked")
private <T extends InputSplit> List<InputSplit> writeNewSplits(Path jobSubmitDir)
        throws IOException, InterruptedException {
    List<InputSplit> splits = createNewSplits();
    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first//from w w  w. j a  va  2  s .  co m
    Arrays.sort(array, new SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, getConf(), jobSubmitDir.getFileSystem(getConf()), array);
    return splits;
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage./*  ww  w  .ja v  a  2  s . c  o  m*/
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir)
        throws IOException, InterruptedException, ClassNotFoundException {

    org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0);

    Configuration conf = jobContext.getConfiguration();

    JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits);

    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length);
    for (int i = 0; i < splits.length; ++i) {
        locationHints.add(TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null));
    }

    return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir),
            JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints,
            jobContext.getCredentials());
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage./*from w w w  .j a  v a  2  s  .  c o m*/
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException {

    org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, 0);

    JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits);

    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length);
    for (int i = 0; i < splits.length; ++i) {
        locationHints.add(TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null));
    }

    return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir),
            JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints,
            jobConf.getCredentials());
}