Example usage for org.apache.hadoop.io IOUtils readFully

List of usage examples for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException 

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * <p>If the file has not already been read, this reads it into memory, so that a call
 * to getCurrentValue() will return the entire contents of this file as Text,
 * and getCurrentKey() will return the qualified path to this file as Text.  Then, returns
 * true.  If it has already been read, then returns false without updating any internal state.</p>
 *
 * @return Whether the file was read or not.
 * @throws IOException if there is an error reading the file.
 * @throws InterruptedException if there is an error.
 *//*from  w w w .  j a va  2 s  .c  o  m*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!mProcessed) {
        if (mFileLength > (long) Integer.MAX_VALUE) {
            throw new IOException("File is longer than Integer.MAX_VALUE.");
        }
        byte[] contents = new byte[(int) mFileLength];

        FileSystem fs = mFileToRead.getFileSystem(mConf);
        FSDataInputStream in = null;
        try {
            // Set the contents of this file.
            in = fs.open(mFileToRead);
            IOUtils.readFully(in, contents, 0, contents.length);
            mFileText.set(contents, 0, contents.length);

            // Set the name of this file.
            String fileName = mFileToRead.makeQualified(fs).toString();
            mFileName.set(fileName);
        } finally {
            IOUtils.closeStream(in);
        }
        mProcessed = true;
        return true;
    }
    return false;
}

From source file:com.twitter.hraven.mapreduce.JobFileRawLoaderMapper.java

License:Apache License

/**
 * Get the raw bytes and the last modification millis for this JobFile
 * //from ww w . ja  v a 2s  .  c  om
 * @return the contents of the job file.
 * @throws IOException
 *           when bad things happen during reading
 */
private byte[] readJobFile(FileStatus fileStatus) throws IOException {
    byte[] rawBytes = null;
    FSDataInputStream fsdis = null;
    try {
        long fileLength = fileStatus.getLen();
        int fileLengthInt = (int) fileLength;
        rawBytes = new byte[fileLengthInt];
        fsdis = hdfs.open(fileStatus.getPath());
        IOUtils.readFully(fsdis, rawBytes, 0, fileLengthInt);
    } finally {
        IOUtils.closeStream(fsdis);
    }
    return rawBytes;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);//www. ja  v a 2 s . c om
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public boolean next(NullWritable key, BytesWritable value) throws IOException {
        if (!processed) {
            byte[] contents = new byte[(int) fileSplit.getLength()];
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = null;
            try {
                in = fs.open(file);//from  w w  w  . j  a v  a2s .c om
                IOUtils.readFully(in, contents, 0, contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                IOUtils.closeStream(in);
            }
            processed = true;
            return true;
        }
        return false;
    }

From source file:de.l3s.common.hadoop.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;//from w  w  w .  j  a  v  a  2s. co m
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (isFinished) {
        return false;
    }//  w w w .ja  v a  2s  . c om

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(conf);
    Path path = split.getPath();
    currentKey = new Text(path.getName());
    FSDataInputStream in = null;
    try {
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        currentValue.set(result, 0, fileLength);

    } finally {
        IOUtils.closeStream(in);
    }

    this.isFinished = true;
    return true;
}

From source file:eu.edisonproject.utility.commons.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;/*from w ww .ja  v a  2  s . c  o m*/
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            //        value.set(contents, 0, contents.length);
            value.set(new String(contents));
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Cat.java

License:Open Source License

@Override
protected int run(final CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("cat :: OUTPATH not given.");
        return 3;
    }/*from   w  ww  . j av  a2 s .  c  om*/
    if (args.size() == 1) {
        System.err.println("cat :: no INPATHs given.");
        return 3;
    }

    final Path outPath = new Path(args.get(0));

    final List<String> ins = args.subList(1, args.size());

    final boolean verbose = parser.getBoolean(verboseOpt);

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat");
    if (stringency == null)
        return 3;

    final Configuration conf = getConf();

    // Expand the glob patterns.

    final List<Path> inputs = new ArrayList<Path>(ins.size());
    for (final String in : ins) {
        try {
            final Path p = new Path(in);
            for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p))
                inputs.add(fstat.getPath());
        } catch (IOException e) {
            System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage());
        }
    }

    final Path input0 = inputs.get(0);

    // Infer the format from the first input path or contents.
    // the first input path or contents.

    SAMFormat format = SAMFormat.inferFromFilePath(input0);
    if (format == null) {
        try {
            format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0));
        } catch (IOException e) {
            System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
            return 4;
        }
        if (format == null) {
            System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0));
            return 4;
        }
    }

    // Choose the header.

    final SAMFileHeader header;
    try {
        final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0));

        header = r.getFileHeader();
        r.close();
    } catch (IOException e) {
        System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
        return 5;
    }

    // Open the output.

    final OutputStream out;

    try {
        out = outPath.getFileSystem(conf).create(outPath);
    } catch (IOException e) {
        System.err.printf("cat :: Could not create output file: %s\n", e.getMessage());
        return 6;
    }

    // Output the header.

    try {
        // Don't use the returned stream, because we're concatenating directly
        // and don't want to apply another layer of compression to BAM.
        new SAMOutputPreparer().prepareForRecords(out, format, header);

    } catch (IOException e) {
        System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage());
        return 7;
    }

    // Output the records from each file in the order given, converting if
    // necessary.

    int inIdx = 1;
    try {
        for (final Path inPath : inputs) {
            if (verbose) {
                System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size());
            }
            switch (format) {
            case SAM: {
                final InputStream in = inPath.getFileSystem(conf).open(inPath);

                // Use SAMFileReader to grab the header, but ignore it, thus
                // ensuring that the header has been skipped.
                new SAMFileReader(in).getFileHeader();

                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            case BAM: {
                final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath);

                // Find the block length, thankfully given to us by the BGZF
                // format. We need it in order to know how much gzipped data to
                // read after skipping the BAM header, so that we can only read
                // that much and then simply copy the remaining gzip blocks
                // directly.

                final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN);

                // Don't use readFully here, since EOF is fine.
                for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block
                        .capacity();) {
                    // EOF is fine.
                    if (prev == -1)
                        break;
                    read += prev;
                }

                // Find the BGZF subfield and extract the length from it.
                int blockLength = 0;
                for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) {
                    final int slen = (int) block.getShort(i + 2) & 0xffff;
                    if (block.getShort(i) == 0x4342 && slen == 2) {
                        blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1;
                        break;
                    }
                    i += 4 + slen;
                }
                if (blockLength == 0)
                    throw new IOException("BGZF extra field not found in " + inPath);

                if (verbose) {
                    System.err.printf("cat ::   first block length %d\n", blockLength);
                }

                // Skip the BAM header. Can't use SAMFileReader because it'll
                // use its own BlockCompressedInputStream.

                final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength);

                final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn);

                // Theoretically we could write into the ByteBuffer we already
                // had, since BlockCompressedInputStream needs to read the
                // header before it can decompress any data and thereafter we
                // can freely overwrite the first 8 bytes of the header... but
                // that's a bit too nasty, so let's not.
                final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN);

                // Read the BAM magic number and the SAM header length, verify
                // the magic, and skip the SAM header.

                IOUtils.readFully(bin, buf.array(), 0, 8);

                final int magic = buf.getInt(0), headerLen = buf.getInt(4);

                if (magic != 0x014d4142)
                    throw new IOException("bad BAM magic number in " + inPath);

                IOUtils.skipFully(bin, headerLen);

                // Skip the reference sequences.

                IOUtils.readFully(bin, buf.array(), 0, 4);

                for (int i = buf.getInt(0); i-- > 0;) {
                    // Read the reference name length and skip it along with the
                    // reference length.
                    IOUtils.readFully(bin, buf.array(), 0, 4);
                    IOUtils.skipFully(bin, buf.getInt(0) + 4);
                }

                // Recompress the rest of this gzip block.

                final int remaining = bin.available();

                if (verbose)
                    System.err.printf("cat ::   %d bytes to bgzip\n", remaining);

                if (remaining > 0) {
                    // The overload of IOUtils.copyBytes that takes "long length"
                    // was added only in Hadoop 0.20.205.0, which we don't want
                    // to depend on, so copy manually.
                    final byte[] remBuf = new byte[remaining];
                    IOUtils.readFully(bin, remBuf, 0, remBuf.length);

                    final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null);

                    bout.write(remBuf);
                    bout.flush();
                }

                // Just copy the raw bytes comprising the remaining blocks.

                in.seek(blockLength);
                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            }
        }
    } catch (IOException e) {
        System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage());
        return 8;
    }

    // For BAM, output the BGZF terminator.

    try {
        if (format == SAMFormat.BAM)
            out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

        out.close();
    } catch (IOException e) {
        System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage());
        return 9;
    }
    return 0;
}

From source file:io.hops.erasure_coding.RaidUtils.java

License:Apache License

public static void copyBytes(InputStream in, OutputStream out, byte[] buf, long count) throws IOException {
    for (long bytesRead = 0; bytesRead < count;) {
        int toRead = Math.min(buf.length, (int) (count - bytesRead));
        IOUtils.readFully(in, buf, 0, toRead);
        bytesRead += toRead;// ww  w. j a va 2 s .  co m
        out.write(buf, 0, toRead);
    }
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from w  ww. j  a v  a 2 s .  c  om
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}