Example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getOffset

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getOffset.

Prototype

public long getOffset(int i)

Source Link

Document

Returns the start offset of the i^th Path

Usage

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 * /*from  ww  w.j  av  a2  s  . com*/
 * @param fileSplit
 *            The CombineFileSplit that this will read from.
 * @param context
 *            The context for this task.
 * @param pathToProcess
 *            The path index from the CombineFileSplit to process in this
 *            record.
 */
public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context,
        Integer pathToProcess) {
    isProcessed = false;

    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);

    mConf = context.getConfiguration();

    /* never used in production, just for code integrity */
    assert 0 == fileSplit.getOffset(pathToProcess);

    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);

            /* never used in production, just for code integrity */
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            LOG.debug("Problem in file length");
        }
    }

    fileContent = new BytesWritable();
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *//*from  w w  w .  j  av a 2  s.c  o  m*/
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            final FileSystem fs = mFileToRead.getFileSystem(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:edu.gslis.streamcorpus.ThriftRecordReader.java

License:Apache License

public ThriftRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    this.pos = startOffset;

    in = fs.open(path);/* www.  j  a va  2  s .com*/

    if (path.toUri().toString().endsWith("xz"))
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(new XZInputStream(in)));
    else
        tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));

}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);//from w ww . j  a v  a2  s  .c om
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java

License:Apache License

public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {

    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    boolean skipFirstLine = false;

    //open the file
    fileIn = fs.open(path);/*from w ww  . j  av a2s .  co m*/
    if (startOffset != 0) {
        skipFirstLine = true;
        --startOffset;
        fileIn.seek(startOffset);
    }
    reader = new LineReader(fileIn);
    if (skipFirstLine) { // skip first line and re-establish "startOffset".
        startOffset += reader.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
    }
    this.pos = startOffset;
}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx)
        throws IOException {
    this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), fileSplit.getLength(idx),
            fileSplit.getLocations());//from   w  ww  . java2s  . c  om
    this.configuration = taskAttemptContext.getConfiguration();
    this.index = new IntWritable(idx);
    this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]);
}

From source file:org.kiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *//*ww  w  .  ja  va2  s. c o  m*/
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:org.kitesdk.data.spi.filesystem.AbstractCombineFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (delegate != null) {
        delegate.close();//from   w  w  w. j  a v  a2 s  . com
    }
    if (split instanceof CombineFileSplit) {
        CombineFileSplit combineSplit = (CombineFileSplit) split;
        FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx),
                combineSplit.getLength(idx), combineSplit.getLocations());
        delegate = getInputFormat().createRecordReader(fileSplit, context);
        delegate.initialize(fileSplit, context);
    } else {
        throw new DatasetOperationException("Split is not a CombineFileSplit: %s:%s",
                split.getClass().getCanonicalName(), split);
    }
}