Example usage for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * <p>If the file has not already been read, this reads it into memory, so that a call
 * to getCurrentValue() will return the entire contents of this file as Text,
 * and getCurrentKey() will return the qualified path to this file as Text.  Then, returns
 * true.  If it has already been read, then returns false without updating any internal state.</p>
 *
 * @return Whether the file was read or not.
 * @throws IOException if there is an error reading the file.
 * @throws InterruptedException if there is an error.
 *//*from  w w w .  j a va  2 s  .c  o  m*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!mProcessed) {
        if (mFileLength > (long) Integer.MAX_VALUE) {
            throw new IOException("File is longer than Integer.MAX_VALUE.");
        }
        byte[] contents = new byte[(int) mFileLength];

        FileSystem fs = mFileToRead.getFileSystem(mConf);
        FSDataInputStream in = null;
        try {
            // Set the contents of this file.
            in = fs.open(mFileToRead);
            IOUtils.readFully(in, contents, 0, contents.length);
            mFileText.set(contents, 0, contents.length);

            // Set the name of this file.
            String fileName = mFileToRead.makeQualified(fs).toString();
            mFileName.set(fileName);
        } finally {
            IOUtils.closeStream(in);
        }
        mProcessed = true;
        return true;
    }
    return false;
}

From source file:com.twitter.hraven.mapreduce.JobFileRawLoaderMapper.java

License:Apache License

/**
 * Get the raw bytes and the last modification millis for this JobFile
 * //from ww w . ja  v a 2s  .  c  om
 * @return the contents of the job file.
 * @throws IOException
 *           when bad things happen during reading
 */
private byte[] readJobFile(FileStatus fileStatus) throws IOException {
    byte[] rawBytes = null;
    FSDataInputStream fsdis = null;
    try {
        long fileLength = fileStatus.getLen();
        int fileLengthInt = (int) fileLength;
        rawBytes = new byte[fileLengthInt];
        fsdis = hdfs.open(fileStatus.getPath());
        IOUtils.readFully(fsdis, rawBytes, 0, fileLengthInt);
    } finally {
        IOUtils.closeStream(fsdis);
    }
    return rawBytes;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);//www. ja  v a 2 s . c om
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean next(NullWritable key, BytesWritable value) throws IOException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);//from  w w  w  . j  a v  a2s .c om
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:de.l3s.common.hadoop.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;//from w  w  w .  j  a  v  a  2s. co m
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (isFinished) {
        return false;
    }//  w w w .ja  v a  2s  . c om

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(conf);
    Path path = split.getPath();
    currentKey = new Text(path.getName());
    FSDataInputStream in = null;
    try {
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        currentValue.set(result, 0, fileLength);

    } finally {
        IOUtils.closeStream(in);
    }

    this.isFinished = true;
    return true;
}

From source file:eu.edisonproject.utility.commons.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;/*from w ww .ja  v a  2  s . c  o m*/
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            //        value.set(contents, 0, contents.length);
            value.set(new String(contents));
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Cat.java

License:Open Source License

@Override
protected int run(final CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("cat :: OUTPATH not given.");
        return 3;
    }/*from   w  ww  . j av  a2 s .  c  om*/
    if (args.size() == 1) {
        System.err.println("cat :: no INPATHs given.");
        return 3;
    }

    final Path outPath = new Path(args.get(0));

    final List<String> ins = args.subList(1, args.size());

    final boolean verbose = parser.getBoolean(verboseOpt);

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat");
    if (stringency == null)
        return 3;

    final Configuration conf = getConf();

    // Expand the glob patterns.

    final List<Path> inputs = new ArrayList<Path>(ins.size());
    for (final String in : ins) {
        try {
            final Path p = new Path(in);
            for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p))
                inputs.add(fstat.getPath());
        } catch (IOException e) {
            System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage());
        }
    }

    final Path input0 = inputs.get(0);

    // Infer the format from the first input path or contents.
    // the first input path or contents.

    SAMFormat format = SAMFormat.inferFromFilePath(input0);
    if (format == null) {
        try {
            format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0));
        } catch (IOException e) {
            System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
            return 4;
        }
        if (format == null) {
            System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0));
            return 4;
        }
    }

    // Choose the header.

    final SAMFileHeader header;
    try {
        final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0));

        header = r.getFileHeader();
        r.close();
    } catch (IOException e) {
        System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
        return 5;
    }

    // Open the output.

    final OutputStream out;

    try {
        out = outPath.getFileSystem(conf).create(outPath);
    } catch (IOException e) {
        System.err.printf("cat :: Could not create output file: %s\n", e.getMessage());
        return 6;
    }

    // Output the header.

    try {
        // Don't use the returned stream, because we're concatenating directly
        // and don't want to apply another layer of compression to BAM.
        new SAMOutputPreparer().prepareForRecords(out, format, header);

    } catch (IOException e) {
        System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage());
        return 7;
    }

    // Output the records from each file in the order given, converting if
    // necessary.

    int inIdx = 1;
    try {
        for (final Path inPath : inputs) {
            if (verbose) {
                System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size());
            }
            switch (format) {
            case SAM: {
                final InputStream in = inPath.getFileSystem(conf).open(inPath);

                // Use SAMFileReader to grab the header, but ignore it, thus
                // ensuring that the header has been skipped.
                new SAMFileReader(in).getFileHeader();

                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            case BAM: {
                final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath);

                // Find the block length, thankfully given to us by the BGZF
                // format. We need it in order to know how much gzipped data to
                // read after skipping the BAM header, so that we can only read
                // that much and then simply copy the remaining gzip blocks
                // directly.

                final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN);

                // Don't use readFully here, since EOF is fine.
                for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block
                        .capacity();) {
                    // EOF is fine.
                    if (prev == -1)
                        break;
                    read += prev;
                }

                // Find the BGZF subfield and extract the length from it.
                int blockLength = 0;
                for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) {
                    final int slen = (int) block.getShort(i + 2) & 0xffff;
                    if (block.getShort(i) == 0x4342 && slen == 2) {
                        blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1;
                        break;
                    }
                    i += 4 + slen;
                }
                if (blockLength == 0)
                    throw new IOException("BGZF extra field not found in " + inPath);

                if (verbose) {
                    System.err.printf("cat ::   first block length %d\n", blockLength);
                }

                // Skip the BAM header. Can't use SAMFileReader because it'll
                // use its own BlockCompressedInputStream.

                final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength);

                final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn);

                // Theoretically we could write into the ByteBuffer we already
                // had, since BlockCompressedInputStream needs to read the
                // header before it can decompress any data and thereafter we
                // can freely overwrite the first 8 bytes of the header... but
                // that's a bit too nasty, so let's not.
                final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN);

                // Read the BAM magic number and the SAM header length, verify
                // the magic, and skip the SAM header.

                IOUtils.readFully(bin, buf.array(), 0, 8);

                final int magic = buf.getInt(0), headerLen = buf.getInt(4);

                if (magic != 0x014d4142)
                    throw new IOException("bad BAM magic number in " + inPath);

                IOUtils.skipFully(bin, headerLen);

                // Skip the reference sequences.

                IOUtils.readFully(bin, buf.array(), 0, 4);

                for (int i = buf.getInt(0); i-- > 0;) {
                    // Read the reference name length and skip it along with the
                    // reference length.
                    IOUtils.readFully(bin, buf.array(), 0, 4);
                    IOUtils.skipFully(bin, buf.getInt(0) + 4);
                }

                // Recompress the rest of this gzip block.

                final int remaining = bin.available();

                if (verbose)
                    System.err.printf("cat ::   %d bytes to bgzip\n", remaining);

                if (remaining > 0) {
                    // The overload of IOUtils.copyBytes that takes "long length"
                    // was added only in Hadoop 0.20.205.0, which we don't want
                    // to depend on, so copy manually.
                    final byte[] remBuf = new byte[remaining];
                    IOUtils.readFully(bin, remBuf, 0, remBuf.length);

                    final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null);

                    bout.write(remBuf);
                    bout.flush();
                }

                // Just copy the raw bytes comprising the remaining blocks.

                in.seek(blockLength);
                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            }
        }
    } catch (IOException e) {
        System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage());
        return 8;
    }

    // For BAM, output the BGZF terminator.

    try {
        if (format == SAMFormat.BAM)
            out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

        out.close();
    } catch (IOException e) {
        System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage());
        return 9;
    }
    return 0;
}

From source file:io.hops.erasure_coding.RaidUtils.java

License:Apache License

public static void copyBytes(InputStream in, OutputStream out, byte[] buf, long count) throws IOException {
    for (long bytesRead = 0; bytesRead < count;) {
        int toRead = Math.min(buf.length, (int) (count - bytesRead));
        IOUtils.readFully(in, buf, 0, toRead);
        bytesRead += toRead;// ww  w. j a va 2 s .  co m
        out.write(buf, 0, toRead);
    }
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from w  ww. j  a v  a 2 s .  c  om
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}