List of usage examples for org.apache.hadoop.mapred MultiFileSplit MultiFileSplit
public MultiFileSplit(JobConf job, Path[] files, long[] lengths)
From source file:StorageEngineClient.MultiFilesFormatStorageInputFormat.java
License:Open Source License
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] fileStatuss = listStatus(job); List<MultiFileSplit> splits = new ArrayList<MultiFileSplit>(Math.min(numSplits, fileStatuss.length)); if (fileStatuss.length != 0) { int splitSize = 0; ArrayList<Path> paths = new ArrayList<Path>(); ArrayList<Long> pathlengths = new ArrayList<Long>(); for (int i = 0; i < fileStatuss.length; i++) { long blocksize = fileStatuss[i].getBlockSize(); if (fileStatuss[i].getLen() > blocksize) { } else if (splitSize + fileStatuss[i].getLen() < blocksize * 0.8) { splitSize += fileStatuss[i].getLen(); paths.add(fileStatuss[i].getPath()); pathlengths.add(fileStatuss[i].getLen()); } else { Path[] splitPaths = paths.toArray(new Path[paths.size()]); long[] splitLengths = new long[pathlengths.size()]; for (int j = 0; j < splitLengths.length; j++) { splitLengths[j] = pathlengths.get(j); }/*w ww . ja v a 2 s . c o m*/ splits.add(new MultiFileSplit(job, splitPaths, splitLengths)); pathlengths.clear(); paths.clear(); splitSize += fileStatuss[i].getLen(); paths.add(fileStatuss[i].getPath()); pathlengths.add(fileStatuss[i].getLen()); } } Path[] splitPaths = paths.toArray(new Path[paths.size()]); long[] splitLengths = new long[pathlengths.size()]; for (int j = 0; j < splitLengths.length; j++) { splitLengths[j] = pathlengths.get(j); } splits.add(new MultiFileSplit(job, splitPaths, splitLengths)); } return splits.toArray(new MultiFileSplit[splits.size()]); }