Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations.

Prototype

public String[] getLocations() throws IOException

Source Link

Document

Returns all the Paths where this input-split resides

Usage

From source file:com.alexholmes.hadooputils.combine.common.mapreduce.SplitMetricsCombineInputFormat.java

License:Apache License

public static String extractLocation(CombineFileSplit split) throws IOException {
    if (split.getLocations() == null || split.getLocations().length == 0) {
        return null;
    }// w w  w .  j a v a 2  s. c  o m
    return split.getLocations()[0];
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);//from  ww  w.  ja v a 2  s  . co  m
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat.java

License:Apache License

/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 *//*from ww  w .  j ava 2  s  .  c  om*/
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
    if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
        // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
        return splits;
    }

    List<InputSplit> cleanedSplits = Lists.newArrayList();

    for (int i = 0; i < splits.size(); i++) {
        CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
        String[] locations = oldSplit.getLocations();

        Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

        if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
            locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
        }

        cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(),
                oldSplit.getLengths(), locations));
    }
    return cleanedSplits;
}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx)
        throws IOException {
    this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), fileSplit.getLength(idx),
            fileSplit.getLocations());
    this.configuration = taskAttemptContext.getConfiguration();
    this.index = new IntWritable(idx);
    this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]);
}

From source file:org.kitesdk.data.spi.filesystem.AbstractCombineFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (delegate != null) {
        delegate.close();//from   w  w  w  . j  a v  a 2s .co m
    }
    if (split instanceof CombineFileSplit) {
        CombineFileSplit combineSplit = (CombineFileSplit) split;
        FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx),
                combineSplit.getLength(idx), combineSplit.getLocations());
        delegate = getInputFormat().createRecordReader(fileSplit, context);
        delegate.initialize(fileSplit, context);
    } else {
        throw new DatasetOperationException("Split is not a CombineFileSplit: %s:%s",
                split.getClass().getCanonicalName(), split);
    }
}