Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations

List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations.

Prototype

public String[] getLocations() throws IOException 

Source Link

Document

Returns all the Paths where this input-split resides

Usage

From source file:com.alexholmes.hadooputils.combine.common.mapreduce.SplitMetricsCombineInputFormat.java

License:Apache License

public static String extractLocation(CombineFileSplit split) throws IOException {
    if (split.getLocations() == null || split.getLocations().length == 0) {
        return null;
    }// w w  w .  j a v a 2  s. c  o m
    return split.getLocations()[0];
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);//from  ww  w.  ja v a 2  s  . co  m
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat.java

License:Apache License

/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 *//*from ww  w .  j ava 2  s  .  c  om*/
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
    if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
        // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
        return splits;
    }

    List<InputSplit> cleanedSplits = Lists.newArrayList();

    for (int i = 0; i < splits.size(); i++) {
        CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
        String[] locations = oldSplit.getLocations();

        Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

        if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
            locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
        }

        cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(),
                oldSplit.getLengths(), locations));
    }
    return cleanedSplits;
}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx)
        throws IOException {
    this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), fileSplit.getLength(idx),
            fileSplit.getLocations());
    this.configuration = taskAttemptContext.getConfiguration();
    this.index = new IntWritable(idx);
    this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]);
}

From source file:org.kitesdk.data.spi.filesystem.AbstractCombineFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (delegate != null) {
        delegate.close();//from   w  w  w  . j  a v  a 2s .co m
    }
    if (split instanceof CombineFileSplit) {
        CombineFileSplit combineSplit = (CombineFileSplit) split;
        FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx),
                combineSplit.getLength(idx), combineSplit.getLocations());
        delegate = getInputFormat().createRecordReader(fileSplit, context);
        delegate.initialize(fileSplit, context);
    } else {
        throw new DatasetOperationException("Split is not a CombineFileSplit: %s:%s",
                split.getClass().getCanonicalName(), split);
    }
}