Example usage for org.apache.hadoop.mapred MultiFileSplit MultiFileSplit

List of usage examples for org.apache.hadoop.mapred MultiFileSplit MultiFileSplit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred MultiFileSplit MultiFileSplit.

Prototype

public MultiFileSplit(JobConf job, Path[] files, long[] lengths) 

Source Link

Usage

From source file:StorageEngineClient.MultiFilesFormatStorageInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    FileStatus[] fileStatuss = listStatus(job);
    List<MultiFileSplit> splits = new ArrayList<MultiFileSplit>(Math.min(numSplits, fileStatuss.length));
    if (fileStatuss.length != 0) {
        int splitSize = 0;
        ArrayList<Path> paths = new ArrayList<Path>();
        ArrayList<Long> pathlengths = new ArrayList<Long>();
        for (int i = 0; i < fileStatuss.length; i++) {
            long blocksize = fileStatuss[i].getBlockSize();
            if (fileStatuss[i].getLen() > blocksize) {

            } else if (splitSize + fileStatuss[i].getLen() < blocksize * 0.8) {
                splitSize += fileStatuss[i].getLen();
                paths.add(fileStatuss[i].getPath());
                pathlengths.add(fileStatuss[i].getLen());
            } else {
                Path[] splitPaths = paths.toArray(new Path[paths.size()]);
                long[] splitLengths = new long[pathlengths.size()];
                for (int j = 0; j < splitLengths.length; j++) {
                    splitLengths[j] = pathlengths.get(j);
                }/*w  ww  . ja v  a 2  s  .  c  o  m*/
                splits.add(new MultiFileSplit(job, splitPaths, splitLengths));
                pathlengths.clear();
                paths.clear();
                splitSize += fileStatuss[i].getLen();
                paths.add(fileStatuss[i].getPath());
                pathlengths.add(fileStatuss[i].getLen());
            }
        }
        Path[] splitPaths = paths.toArray(new Path[paths.size()]);
        long[] splitLengths = new long[pathlengths.size()];
        for (int j = 0; j < splitLengths.length; j++) {
            splitLengths[j] = pathlengths.get(j);
        }
        splits.add(new MultiFileSplit(job, splitPaths, splitLengths));
    }
    return splits.toArray(new MultiFileSplit[splits.size()]);
}