Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getStartOffsets

List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getStartOffsets

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getStartOffsets.

Prototype

public long[] getStartOffsets() 

Source Link

Document

Returns an array containing the start offsets of the files in the split

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    CombineFileSplit cSplit = (CombineFileSplit) split;
    Path[] path = cSplit.getPaths();
    long[] start = cSplit.getStartOffsets();
    long[] len = cSplit.getLengths();

    Configuration conf = context.getConfiguration();
    FileSystem fs = cSplit.getPath(0).getFileSystem(conf);

    this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat.java

License:Apache License

/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 *///from  w  w w  . j a v a2  s  .com
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
    if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
        // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
        return splits;
    }

    List<InputSplit> cleanedSplits = Lists.newArrayList();

    for (int i = 0; i < splits.size(); i++) {
        CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
        String[] locations = oldSplit.getLocations();

        Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

        if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
            locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
        }

        cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(),
                oldSplit.getLengths(), locations));
    }
    return cleanedSplits;
}