Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat CombineFileInputFormat

List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat CombineFileInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat CombineFileInputFormat.

Prototype

public CombineFileInputFormat() 

Source Link

Document

default constructor

Usage

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    int numMultiMappers = confDiff.getNumDiffs();

    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) {
        // reset conf to multimapper i
        confDiff.applyDiff(mapperIndex);

        // get the actual input format
        InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

        List<InputSplit> actualSplits = null;

        // check if combined input split is requested
        boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false);

        if (combineSplit) {
            // Create CombinedFileInputFormat
            CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() {
                @Override//  w  w  w  . j  av a2 s  .com
                public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
                        throws IOException {
                    throw new IllegalStateException("Should not be called");
                }
            };

            // get the splits
            actualSplits = cfif.getSplits(context);
        } else {
            actualSplits = actualInputFormat.getSplits(context);
        }

        // embed each split in MultiMapperSplit and add to list
        for (InputSplit actualSplit : actualSplits)
            splits.add(new MultiMapperSplit(actualSplit, mapperIndex));

        // undo the diff
        confDiff.undoDiff(mapperIndex);
    }
    return splits;
}