Example usage for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from   ww  w. ja va2s.c  om
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/*from  ww  w  .ja  va2 s . co  m*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}

From source file:org.acacia.partitioner.java.WholeFileRecordReader.java

License:Apache License

@Override
public boolean next(NullWritable key, BytesWritable value) throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;//from   w  w  w . j  a  va2  s.  co  m
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.apache.falcon.resource.AbstractEntityManager.java

License:Apache License

private String getAsString(InputStream xmlStream) throws IOException {
    byte[] data = new byte[XML_DEBUG_LEN];
    IOUtils.readFully(xmlStream, data, 0, XML_DEBUG_LEN);
    return new String(data);
}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(this.configuration);

        if (!fs.isFile(file)) {
            return false;
        }//from  w  ww.  j ava2 s. co m

        FileStatus[] fileStatuses;
        if (pathFilter != null) {
            fileStatuses = fs.listStatus(file, pathFilter);
        } else {
            fileStatuses = fs.listStatus(file);
        }

        FSDataInputStream in = null;
        if (fileStatuses.length == 1) {
            try {
                in = fs.open(fileStatuses[0].getPath());
                IOUtils.readFully(in, contents, 0, contents.length);
                value.setCapacity(contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                Closeables.close(in, false);
            }
            processed = true;
            return true;
        }
    }
    return false;
}

From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java

License:Apache License

private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength,
        int compressedLength) throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, job);

    input = checksumIn;/*  w  w w.  j  a  v  a  2s  .c  om*/

    // Are map-outputs compressed?
    if (codec != null) {
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }

    // Copy map-output into an in-memory buffer
    byte[] shuffleData = mapOutput.getMemory();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        metrics.inputBytes(shuffleData.length);
        reporter.progress();
        LOG.info("Read " + shuffleData.length + " bytes from map-output for " + mapOutput.getMapId());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);

        // Re-throw
        throw ioe;
    }

}

From source file:org.apache.tez.engine.common.sort.impl.IFileInputStream.java

License:Apache License

private int doRead(byte[] b, int off, int len) throws IOException {

    // If we are trying to read past the end of data, just read
    // the left over data
    if (currentOffset + len > dataLength) {
        len = (int) dataLength - (int) currentOffset;
    }/*ww  w  .  ja  va  2 s .  co m*/

    int bytesRead = in.read(b, off, len);

    if (bytesRead < 0) {
        throw new ChecksumException("Checksum Error", 0);
    }

    checksum(b, off, bytesRead);

    currentOffset += bytesRead;

    if (disableChecksumValidation) {
        return bytesRead;
    }

    if (currentOffset == dataLength) {
        // The last four bytes are checksum. Strip them and verify
        sum.update(buffer, 0, offset);
        csum = new byte[checksumSize];
        IOUtils.readFully(in, csum, 0, checksumSize);
        if (!sum.compare(csum, 0)) {
            throw new ChecksumException("Checksum Error", 0);
        }
    }
    return bytesRead;
}

From source file:org.apache.tez.engine.common.sort.impl.TezSpillRecord.java

License:Apache License

public TezSpillRecord(Path indexFileName, Configuration job, Checksum crc, String expectedIndexOwner)
        throws IOException {

    final FileSystem rfs = FileSystem.getLocal(job).getRaw();
    final FSDataInputStream in = rfs.open(indexFileName);
    try {/*w w  w .j a va2 s  .c o  m*/
        final long length = rfs.getFileStatus(indexFileName).getLen();
        final int partitions = (int) length / Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
        final int size = partitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;

        buf = ByteBuffer.allocate(size);
        if (crc != null) {
            crc.reset();
            CheckedInputStream chk = new CheckedInputStream(in, crc);
            IOUtils.readFully(chk, buf.array(), 0, size);
            if (chk.getChecksum().getValue() != in.readLong()) {
                throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1);
            }
        } else {
            IOUtils.readFully(in, buf.array(), 0, size);
        }
        entries = buf.asLongBuffer();
    } finally {
        in.close();
    }
}

From source file:org.apache.tez.runtime.library.broadcast.output.FileBasedKVWriter.java

License:Apache License

public byte[] getData() throws IOException {
    Preconditions.checkState(closed, "Only available after the Writer has been closed");
    FSDataInputStream inStream = null;// w w w .jav  a2 s. c o  m
    byte[] buf = null;
    try {
        inStream = rfs.open(outputPath);
        buf = new byte[(int) getCompressedLength()];
        IOUtils.readFully(inStream, buf, 0, (int) getCompressedLength());
    } finally {
        if (inStream != null) {
            inStream.close();
        }
    }
    return buf;
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java

License:Apache License

private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength,
        int compressedLength) throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);//from  www . j a v  a  2 s.  co  m

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }

    // Copy map-output into an in-memory buffer
    byte[] shuffleData = mapOutput.getMemory();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from map-output for "
                + mapOutput.getAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);

        // Re-throw
        throw ioe;
    }

}