Example usage for org.apache.hadoop.mapreduce.lib.input NLineInputFormat NLineInputFormat

List of usage examples for org.apache.hadoop.mapreduce.lib.input NLineInputFormat NLineInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input NLineInputFormat NLineInputFormat.

Prototype

NLineInputFormat

Source Link

Usage

From source file:org.mrgeo.hdfs.vector.DelimitedVectorInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false);
    if (useNLineFormat) {
        List<InputSplit> splits = new NLineInputFormat().getSplits(context);
        // This is a workaround to what appears to be a bug in in how NLineInputFormat
        // computes its splits. When there are multiple splits in a file, it seems
        // the start position in the last split is off by one. Note that this corrective
        // code needs to check the last split for each different file that appears
        // in the list of splits.
        for (int index = 2; index < splits.size(); index++) {
            FileSplit previousSplit = (FileSplit) splits.get(index - 1);
            FileSplit currSplit = (FileSplit) splits.get(index);
            // If this index is the last split, or we've moved on to splits from a different
            // file, then we need to adjust the last split for that file.
            int lastFileIndex = -1;
            if (index == splits.size() - 1) {
                lastFileIndex = index;/*from w  w  w.j ava  2  s . c o  m*/
            } else if (!currSplit.getPath().equals(previousSplit.getPath())) {
                lastFileIndex = index - 1;
            }
            if (lastFileIndex >= 2) {
                FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex);
                FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1);
                if (lastFileSplit.getPath().equals(priorSplit.getPath())) {
                    if (priorSplit.getPath().equals(lastFileSplit.getPath())
                            && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) {
                        // Adjust the start of previous split
                        FileSplit replacement = new FileSplit(lastFileSplit.getPath(),
                                priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1,
                                lastFileSplit.getLocations());
                        log.info("Replacing split: " + lastFileSplit.toString());
                        log.info("  With split: " + replacement.toString());
                        splits.set(lastFileIndex, replacement);
                    }
                }
            }
        }
        return splits;
    } else {
        List<InputSplit> splits = new TextInputFormat().getSplits(context);
        return splits;
    }
}

From source file:org.mrgeo.hdfs.vector.HdfsVectorInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false);
    if (useNLineFormat) {
        List<InputSplit> splits = new NLineInputFormat().getSplits(context);
        // This is a workaround to what appears to be a bug in in how NLineInputFormat
        // computes its splits. When there are multiple splits in a file, it seems
        // the start position in the last split is off by one. Note that this corrective
        // code needs to check the last split for each different file that appears
        // in the list of splits.
        for (int index = 2; index < splits.size(); index++) {
            FileSplit previousSplit = (FileSplit) splits.get(index - 1);
            FileSplit currSplit = (FileSplit) splits.get(index);
            // If this index is the last split, or we've moved on to splits from a different
            // file, then we need to adjust the last split for that file.
            int lastFileIndex = -1;
            if (index == splits.size() - 1) {
                lastFileIndex = index;// w  w w  .  j a v  a  2  s. c  om
            } else if (!currSplit.getPath().equals(previousSplit.getPath())) {
                lastFileIndex = index - 1;
            }
            if (lastFileIndex >= 2) {
                FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex);
                FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1);
                if (lastFileSplit.getPath().equals(priorSplit.getPath()))
                    if (priorSplit.getPath().equals(lastFileSplit.getPath())
                            && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) {
                        // Adjust the start of previous split
                        FileSplit replacement = new FileSplit(lastFileSplit.getPath(),
                                priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1,
                                lastFileSplit.getLocations());
                        log.info("Replacing split: " + lastFileSplit.toString());
                        log.info("  With split: " + replacement.toString());
                        splits.set(lastFileIndex, replacement);
                    }
            }
        }
        return splits;
    } else {
        List<InputSplit> splits = new TextInputFormat().getSplits(context);
        return splits;
    }
}