List of usage examples for org.apache.hadoop.mapreduce.lib.input NLineInputFormat NLineInputFormat
NLineInputFormat
From source file:org.mrgeo.hdfs.vector.DelimitedVectorInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false); if (useNLineFormat) { List<InputSplit> splits = new NLineInputFormat().getSplits(context); // This is a workaround to what appears to be a bug in in how NLineInputFormat // computes its splits. When there are multiple splits in a file, it seems // the start position in the last split is off by one. Note that this corrective // code needs to check the last split for each different file that appears // in the list of splits. for (int index = 2; index < splits.size(); index++) { FileSplit previousSplit = (FileSplit) splits.get(index - 1); FileSplit currSplit = (FileSplit) splits.get(index); // If this index is the last split, or we've moved on to splits from a different // file, then we need to adjust the last split for that file. int lastFileIndex = -1; if (index == splits.size() - 1) { lastFileIndex = index;/*from w w w.j ava 2 s . c o m*/ } else if (!currSplit.getPath().equals(previousSplit.getPath())) { lastFileIndex = index - 1; } if (lastFileIndex >= 2) { FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex); FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1); if (lastFileSplit.getPath().equals(priorSplit.getPath())) { if (priorSplit.getPath().equals(lastFileSplit.getPath()) && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) { // Adjust the start of previous split FileSplit replacement = new FileSplit(lastFileSplit.getPath(), priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1, lastFileSplit.getLocations()); log.info("Replacing split: " + lastFileSplit.toString()); log.info(" With split: " + replacement.toString()); splits.set(lastFileIndex, replacement); } } } } return splits; } else { List<InputSplit> splits = new TextInputFormat().getSplits(context); return splits; } }
From source file:org.mrgeo.hdfs.vector.HdfsVectorInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false); if (useNLineFormat) { List<InputSplit> splits = new NLineInputFormat().getSplits(context); // This is a workaround to what appears to be a bug in in how NLineInputFormat // computes its splits. When there are multiple splits in a file, it seems // the start position in the last split is off by one. Note that this corrective // code needs to check the last split for each different file that appears // in the list of splits. for (int index = 2; index < splits.size(); index++) { FileSplit previousSplit = (FileSplit) splits.get(index - 1); FileSplit currSplit = (FileSplit) splits.get(index); // If this index is the last split, or we've moved on to splits from a different // file, then we need to adjust the last split for that file. int lastFileIndex = -1; if (index == splits.size() - 1) { lastFileIndex = index;// w w w . j a v a 2 s. c om } else if (!currSplit.getPath().equals(previousSplit.getPath())) { lastFileIndex = index - 1; } if (lastFileIndex >= 2) { FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex); FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1); if (lastFileSplit.getPath().equals(priorSplit.getPath())) if (priorSplit.getPath().equals(lastFileSplit.getPath()) && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) { // Adjust the start of previous split FileSplit replacement = new FileSplit(lastFileSplit.getPath(), priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1, lastFileSplit.getLocations()); log.info("Replacing split: " + lastFileSplit.toString()); log.info(" With split: " + replacement.toString()); splits.set(lastFileIndex, replacement); } } } return splits; } else { List<InputSplit> splits = new TextInputFormat().getSplits(context); return splits; } }