Example usage for org.apache.hadoop.io IOUtils readFully

List of usage examples for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException 

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from   ww  w. ja va2s.c  om
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/*from  ww  w  .ja  va2 s . co  m*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}

From source file:org.acacia.partitioner.java.WholeFileRecordReader.java

License:Apache License

@Override
public boolean next(NullWritable key, BytesWritable value) throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;//from   w  w  w . j  a  va2  s.  co  m
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.apache.falcon.resource.AbstractEntityManager.java

License:Apache License

private String getAsString(InputStream xmlStream) throws IOException {
    byte[] data = new byte[XML_DEBUG_LEN];
    IOUtils.readFully(xmlStream, data, 0, XML_DEBUG_LEN);
    return new String(data);
}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(this.configuration);

        if (!fs.isFile(file)) {
            return false;
        }//from  w  ww.  j ava2 s. co m

        FileStatus[] fileStatuses;
        if (pathFilter != null) {
            fileStatuses = fs.listStatus(file, pathFilter);
        } else {
            fileStatuses = fs.listStatus(file);
        }

        FSDataInputStream in = null;
        if (fileStatuses.length == 1) {
            try {
                in = fs.open(fileStatuses[0].getPath());
                IOUtils.readFully(in, contents, 0, contents.length);
                value.setCapacity(contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                Closeables.close(in, false);
            }
            processed = true;
            return true;
        }
    }
    return false;
}

From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java

License:Apache License

private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength,
        int compressedLength) throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, job);

    input = checksumIn;/*  w  w w.  j  a  v  a  2s  .c  om*/

    // Are map-outputs compressed?
    if (codec != null) {
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }

    // Copy map-output into an in-memory buffer
    byte[] shuffleData = mapOutput.getMemory();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        metrics.inputBytes(shuffleData.length);
        reporter.progress();
        LOG.info("Read " + shuffleData.length + " bytes from map-output for " + mapOutput.getMapId());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);

        // Re-throw
        throw ioe;
    }

}

From source file:org.apache.tez.engine.common.sort.impl.IFileInputStream.java

License:Apache License

private int doRead(byte[] b, int off, int len) throws IOException {

    // If we are trying to read past the end of data, just read
    // the left over data
    if (currentOffset + len > dataLength) {
        len = (int) dataLength - (int) currentOffset;
    }/*ww  w  .  ja  va  2 s .  co m*/

    int bytesRead = in.read(b, off, len);

    if (bytesRead < 0) {
        throw new ChecksumException("Checksum Error", 0);
    }

    checksum(b, off, bytesRead);

    currentOffset += bytesRead;

    if (disableChecksumValidation) {
        return bytesRead;
    }

    if (currentOffset == dataLength) {
        // The last four bytes are checksum. Strip them and verify
        sum.update(buffer, 0, offset);
        csum = new byte[checksumSize];
        IOUtils.readFully(in, csum, 0, checksumSize);
        if (!sum.compare(csum, 0)) {
            throw new ChecksumException("Checksum Error", 0);
        }
    }
    return bytesRead;
}

From source file:org.apache.tez.engine.common.sort.impl.TezSpillRecord.java

License:Apache License

public TezSpillRecord(Path indexFileName, Configuration job, Checksum crc, String expectedIndexOwner)
        throws IOException {

    final FileSystem rfs = FileSystem.getLocal(job).getRaw();
    final FSDataInputStream in = rfs.open(indexFileName);
    try {/*w w  w .j a va2 s  .c o  m*/
        final long length = rfs.getFileStatus(indexFileName).getLen();
        final int partitions = (int) length / Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
        final int size = partitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;

        buf = ByteBuffer.allocate(size);
        if (crc != null) {
            crc.reset();
            CheckedInputStream chk = new CheckedInputStream(in, crc);
            IOUtils.readFully(chk, buf.array(), 0, size);
            if (chk.getChecksum().getValue() != in.readLong()) {
                throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1);
            }
        } else {
            IOUtils.readFully(in, buf.array(), 0, size);
        }
        entries = buf.asLongBuffer();
    } finally {
        in.close();
    }
}

From source file:org.apache.tez.runtime.library.broadcast.output.FileBasedKVWriter.java

License:Apache License

public byte[] getData() throws IOException {
    Preconditions.checkState(closed, "Only available after the Writer has been closed");
    FSDataInputStream inStream = null;// w w w .jav  a2 s. c o  m
    byte[] buf = null;
    try {
        inStream = rfs.open(outputPath);
        buf = new byte[(int) getCompressedLength()];
        IOUtils.readFully(inStream, buf, 0, (int) getCompressedLength());
    } finally {
        if (inStream != null) {
            inStream.close();
        }
    }
    return buf;
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java

License:Apache License

private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength,
        int compressedLength) throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);//from  www . j a v  a  2 s.  co  m

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }

    // Copy map-output into an in-memory buffer
    byte[] shuffleData = mapOutput.getMemory();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from map-output for "
                + mapOutput.getAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);

        // Re-throw
        throw ioe;
    }

}