Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat getSplits

List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat getSplits

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat getSplits.

Prototype

@Override
    public List<InputSplit> getSplits(JobContext job) throws IOException 

Source Link

Usage

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    int numMultiMappers = confDiff.getNumDiffs();

    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) {
        // reset conf to multimapper i
        confDiff.applyDiff(mapperIndex);

        // get the actual input format
        InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

        List<InputSplit> actualSplits = null;

        // check if combined input split is requested
        boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false);

        if (combineSplit) {
            // Create CombinedFileInputFormat
            CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() {
                @Override//from  w  w  w .j ava2s .c  o m
                public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
                        throws IOException {
                    throw new IllegalStateException("Should not be called");
                }
            };

            // get the splits
            actualSplits = cfif.getSplits(context);
        } else {
            actualSplits = actualInputFormat.getSplits(context);
        }

        // embed each split in MultiMapperSplit and add to list
        for (InputSplit actualSplit : actualSplits)
            splits.add(new MultiMapperSplit(actualSplit, mapperIndex));

        // undo the diff
        confDiff.undoDiff(mapperIndex);
    }
    return splits;
}