Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath

List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath.

Prototype

public Path getPath(int i) 

Source Link

Document

Returns the ith Path

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    CombineFileSplit cSplit = (CombineFileSplit) split;
    Path[] path = cSplit.getPaths();
    long[] start = cSplit.getStartOffsets();
    long[] len = cSplit.getLengths();

    Configuration conf = context.getConfiguration();
    FileSystem fs = cSplit.getPath(0).getFileSystem(conf);

    this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 * /*from w w w.j av  a  2  s  .  c o m*/
 * @param fileSplit
 *            The CombineFileSplit that this will read from.
 * @param context
 *            The context for this task.
 * @param pathToProcess
 *            The path index from the CombineFileSplit to process in this
 *            record.
 */
public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context,
        Integer pathToProcess) {
    isProcessed = false;

    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);

    mConf = context.getConfiguration();

    /* never used in production, just for code integrity */
    assert 0 == fileSplit.getOffset(pathToProcess);

    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);

            /* never used in production, just for code integrity */
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            LOG.debug("Problem in file length");
        }
    }

    fileContent = new BytesWritable();
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *//*from  w  w  w .java 2s.c o  m*/
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            final FileSystem fs = mFileToRead.getFileSystem(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:edu.gslis.streamcorpus.ThriftRecordReader.java

License:Apache License

public ThriftRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    this.pos = startOffset;

    in = fs.open(path);/*  w  ww .ja  v a 2s. c  o  m*/

    if (path.toUri().toString().endsWith("xz"))
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(new XZInputStream(in)));
    else
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));

}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;/*from  w w  w. jav a2  s .  c  o m*/
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatialRecordReader3.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatialRecordReader if none of the above worked
    return (RecordReader) new SpatialRecordReader3<V>();
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);// w  w  w  .  j  a  v  a2 s .co m
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:edu.umn.cs.sthadoop.mapreduce.SpatioTemporalInputFormat.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;//from  www .ja v a  2  s  .c o  m
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatioTemporalRecordReader.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatioTemporalRecordReader if none of the above worked
    return (RecordReader) new SpatioTemporalRecordReader<V>();
}

From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java

License:Apache License

public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
}

From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java

License:Apache License

public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {

    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    boolean skipFirstLine = false;

    //open the file
    fileIn = fs.open(path);/*from   w  w  w  .j av  a2s.c  o m*/
    if (startOffset != 0) {
        skipFirstLine = true;
        --startOffset;
        fileIn.seek(startOffset);
    }
    reader = new LineReader(fileIn);
    if (skipFirstLine) { // skip first line and re-establish "startOffset".
        startOffset += reader.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
    }
    this.pos = startOffset;
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java

License:Apache License

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
    Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
    if (schema != null) {
        return schema;
    }/*from ww w .  j a v  a  2 s. com*/

    Path path = split.getPath(idx);
    FileSystem fs = path.getFileSystem(cx.getConfiguration());
    return AvroUtils.getSchemaFromDataFile(path, fs);
}