List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileInputFormat getSplits
@Override public List<InputSplit> getSplits(JobContext job) throws IOException
From source file:com.linkedin.cubert.io.CubertInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); ConfigurationDiff confDiff = new ConfigurationDiff(conf); int numMultiMappers = confDiff.getNumDiffs(); List<InputSplit> splits = new ArrayList<InputSplit>(); for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) { // reset conf to multimapper i confDiff.applyDiff(mapperIndex); // get the actual input format InputFormat<K, V> actualInputFormat = getActualInputFormat(context); List<InputSplit> actualSplits = null; // check if combined input split is requested boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false); if (combineSplit) { // Create CombinedFileInputFormat CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() { @Override//from w w w .j ava2s .c o m public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { throw new IllegalStateException("Should not be called"); } }; // get the splits actualSplits = cfif.getSplits(context); } else { actualSplits = actualInputFormat.getSplits(context); } // embed each split in MultiMapperSplit and add to list for (InputSplit actualSplit : actualSplits) splits.add(new MultiMapperSplit(actualSplit, mapperIndex)); // undo the diff confDiff.undoDiff(mapperIndex); } return splits; }