Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath.

Prototype

public Path getPath(int i)

Source Link

Document

Returns the i^th Path

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    CombineFileSplit cSplit = (CombineFileSplit) split;
    Path[] path = cSplit.getPaths();
    long[] start = cSplit.getStartOffsets();
    long[] len = cSplit.getLengths();

    Configuration conf = context.getConfiguration();
    FileSystem fs = cSplit.getPath(0).getFileSystem(conf);

    this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 * /*from w w w.j av  a  2  s  .  c o m*/
 * @param fileSplit
 *            The CombineFileSplit that this will read from.
 * @param context
 *            The context for this task.
 * @param pathToProcess
 *            The path index from the CombineFileSplit to process in this
 *            record.
 */
public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context,
        Integer pathToProcess) {
    isProcessed = false;

    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);

    mConf = context.getConfiguration();

    /* never used in production, just for code integrity */
    assert 0 == fileSplit.getOffset(pathToProcess);

    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);

            /* never used in production, just for code integrity */
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            LOG.debug("Problem in file length");
        }
    }

    fileContent = new BytesWritable();
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *//*from  w  w  w .java 2s.c o  m*/
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            final FileSystem fs = mFileToRead.getFileSystem(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:edu.gslis.streamcorpus.ThriftRecordReader.java

License:Apache License

public ThriftRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    this.pos = startOffset;

    in = fs.open(path);/*  w  ww .ja  v a 2s. c  o  m*/

    if (path.toUri().toString().endsWith("xz"))
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(new XZInputStream(in)));
    else
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));

}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;/*from  w w  w. jav a2  s .  c  o m*/
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatialRecordReader3.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatialRecordReader if none of the above worked
    return (RecordReader) new SpatialRecordReader3<V>();
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);// w  w  w  .  j  a  v  a2 s .co m
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:edu.umn.cs.sthadoop.mapreduce.SpatioTemporalInputFormat.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;//from  www .ja v a  2  s  .c o  m
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatioTemporalRecordReader.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatioTemporalRecordReader if none of the above worked
    return (RecordReader) new SpatioTemporalRecordReader<V>();
}

From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java

License:Apache License

public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
}

From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java

License:Apache License

public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {

    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    boolean skipFirstLine = false;

    //open the file
    fileIn = fs.open(path);/*from   w  w  w  .j av  a2s.c  o m*/
    if (startOffset != 0) {
        skipFirstLine = true;
        --startOffset;
        fileIn.seek(startOffset);
    }
    reader = new LineReader(fileIn);
    if (skipFirstLine) { // skip first line and re-establish "startOffset".
        startOffset += reader.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
    }
    this.pos = startOffset;
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java

License:Apache License

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
    Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
    if (schema != null) {
        return schema;
    }/*from ww w .  j a v  a  2 s. com*/

    Path path = split.getPath(idx);
    FileSystem fs = path.getFileSystem(cx.getConfiguration());
    return AvroUtils.getSchemaFromDataFile(path, fs);
}