Example usage for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:org.apache.tez.runtime.library.common.sort.impl.IFileInputStream.java

License:Apache License

private int doRead(byte[] b, int off, int len) throws IOException {

    // If we are trying to read past the end of data, just read
    // the left over data
    int origLen = len;
    if (currentOffset + len > dataLength) {
        len = (int) (dataLength - currentOffset);
    }/*from  www  . ja  va2s . c o  m*/

    int bytesRead = in.read(b, off, len);

    if (bytesRead < 0) {
        String mesg = " CurrentOffset=" + currentOffset + ", offset=" + offset + ", off=" + off
                + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length
                + ", checksumSize=" + checksumSize;
        LOG.info(mesg);
        throw new ChecksumException("Checksum Error: " + mesg, 0);
    }

    checksum(b, off, bytesRead);

    currentOffset += bytesRead;

    if (disableChecksumValidation) {
        return bytesRead;
    }

    if (currentOffset == dataLength) {
        // The last four bytes are checksum. Strip them and verify
        sum.update(buffer, 0, offset);
        csum = new byte[checksumSize];
        IOUtils.readFully(in, csum, 0, checksumSize);
        if (!sum.compare(csum, 0)) {
            String mesg = "CurrentOffset=" + currentOffset + ", off=" + offset + ", dataLength=" + dataLength
                    + ", origLen=" + origLen + ", len=" + len + ", length=" + length + ", checksumSize="
                    + checksumSize + ", csum=" + Arrays.toString(csum) + ", sum=" + sum;
            LOG.info(mesg);

            throw new ChecksumException("Checksum Error: " + mesg, 0);
        }
    }
    return bytesRead;
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);/*  w  w w. ja va 2  s .c om*/

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.geotools.WholeFile.WholeFileRecordReader.java

License:Apache License

@Override
public boolean next(Text key, BytesWritable value) throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();

        String fileName = file.getName();
        key.set(fileName);/*  w w w . ja v a2  s  . co m*/

        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.geotools.WholeFile.WholeFileRecordReader_NewAPI.java

License:Apache License

public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;//from   ww w. java  2s.  c om
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.interactiverobotics.source_code_crawler.step6.TextFileRecordReader.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!processed) {
        final byte[] contents = new byte[(int) fileSplit.getLength()];
        final Path file = fileSplit.getPath();
        final FileSystem fileSystem = file.getFileSystem(configuration);
        FSDataInputStream in = null;/*from  w  ww  . ja  v  a2s . c o m*/
        try {
            in = fileSystem.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            key.set(file.toString());
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.kitesdk.cli.commands.TarImportCommand.java

License:Apache License

@Override
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "Tar path and target dataset URI are required.");

    Preconditions.checkArgument(SUPPORTED_TAR_COMPRESSION_TYPES.contains(compressionType),
            "Compression type " + compressionType + " is not supported");

    String source = targets.get(0);
    String datasetUri = targets.get(1);

    long blockSize = getConf().getLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);

    int success = 0;

    View<TarFileEntry> targetDataset;
    if (Datasets.exists(datasetUri)) {
        console.debug("Using existing dataset: {}", datasetUri);
        targetDataset = Datasets.load(datasetUri, TarFileEntry.class);
    } else {//from   ww w  .j  a  v a 2s. c o  m
        console.info("Creating new dataset: {}", datasetUri);
        DatasetDescriptor.Builder descriptorBuilder = new DatasetDescriptor.Builder();
        descriptorBuilder.format(Formats.AVRO);
        descriptorBuilder.schema(TarFileEntry.class);
        targetDataset = Datasets.create(datasetUri, descriptorBuilder.build(), TarFileEntry.class);
    }

    DatasetWriter<TarFileEntry> writer = targetDataset.newWriter();

    // Create a Tar input stream wrapped in appropriate decompressor
    // TODO: Enhancement would be to use native compression libs
    TarArchiveInputStream tis;
    CompressionType tarCompressionType = CompressionType.NONE;

    if (compressionType.isEmpty()) {
        if (source.endsWith(".tar")) {
            tarCompressionType = CompressionType.NONE;
        } else if (source.endsWith(".tar.gz")) {
            tarCompressionType = CompressionType.GZIP;
        } else if (source.endsWith(".tar.bz2")) {
            tarCompressionType = CompressionType.BZIP2;
        }
    } else if (compressionType.equals("gzip")) {
        tarCompressionType = CompressionType.GZIP;
    } else if (compressionType.equals("bzip2")) {
        tarCompressionType = CompressionType.BZIP2;
    } else {
        tarCompressionType = CompressionType.NONE;
    }

    console.info("Using {} compression", tarCompressionType);

    switch (tarCompressionType) {
    case GZIP:
        tis = new TarArchiveInputStream(new GzipCompressorInputStream(open(source)));
        break;
    case BZIP2:
        tis = new TarArchiveInputStream(new BZip2CompressorInputStream(open(source)));
        break;
    case NONE:
    default:
        tis = new TarArchiveInputStream(open(source));
    }

    TarArchiveEntry entry;

    try {
        int count = 0;
        while ((entry = tis.getNextTarEntry()) != null) {
            if (!entry.isDirectory()) {
                long size = entry.getSize();
                if (size >= blockSize) {
                    console.warn(
                            "Entry \"{}\" (size {}) is larger than the "
                                    + "HDFS block size of {}. This may result in remote block reads",
                            new Object[] { entry.getName(), size, blockSize });
                }

                byte[] buf = new byte[(int) size];
                try {
                    IOUtils.readFully(tis, buf, 0, (int) size);
                } catch (IOException e) {
                    console.error("Did not read entry {} successfully (entry size {})", entry.getName(), size);
                    success = 1;
                    throw e;
                }
                writer.write(TarFileEntry.newBuilder().setFilename(entry.getName())
                        .setFilecontent(ByteBuffer.wrap(buf)).build());
                count++;
            }
        }
        console.info("Added {} records to \"{}\"", count, targetDataset.getDataset().getName());
    } finally {
        IOUtils.closeStream(writer);
        IOUtils.closeStream(tis);
    }

    return success;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.Cat.java

License:Open Source License

@Override
protected int run(final CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("cat :: OUTPATH not given.");
        return 3;
    }/*  ww  w . j a  va2  s.  co m*/
    if (args.size() == 1) {
        System.err.println("cat :: no INPATHs given.");
        return 3;
    }

    final Path outPath = new Path(args.get(0));

    final List<String> ins = args.subList(1, args.size());

    final boolean verbose = parser.getBoolean(verboseOpt);

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat");
    if (stringency == null)
        return 3;

    final Configuration conf = getConf();

    // Expand the glob patterns.

    final List<Path> inputs = new ArrayList<Path>(ins.size());
    for (final String in : ins) {
        try {
            final Path p = new Path(in);
            for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p))
                inputs.add(fstat.getPath());
        } catch (IOException e) {
            System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage());
        }
    }

    final Path input0 = inputs.get(0);

    // Infer the format from the first input path or contents.
    // the first input path or contents.

    SAMFormat format = SAMFormat.inferFromFilePath(input0);
    if (format == null) {
        try {
            format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0));
        } catch (IOException e) {
            System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
            return 4;
        }
        if (format == null) {
            System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0));
            return 4;
        }
    }

    // Choose the header.

    final SAMFileHeader header;
    try {
        final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0));

        header = r.getFileHeader();
        r.close();
    } catch (IOException e) {
        System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
        return 5;
    }

    // Open the output.

    final OutputStream out;

    try {
        out = outPath.getFileSystem(conf).create(outPath);
    } catch (IOException e) {
        System.err.printf("cat :: Could not create output file: %s\n", e.getMessage());
        return 6;
    }

    // Output the header.

    try {
        // Don't use the returned stream, because we're concatenating directly
        // and don't want to apply another layer of compression to BAM.
        new SAMOutputPreparer().prepareForRecords(out, format, header);

    } catch (IOException e) {
        System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage());
        return 7;
    }

    // Output the records from each file in the order given, converting if
    // necessary.

    int inIdx = 1;
    try {
        for (final Path inPath : inputs) {
            if (verbose) {
                System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size());
            }
            switch (format) {
            case SAM: {
                final InputStream in = inPath.getFileSystem(conf).open(inPath);

                // Use SAMFileReader to grab the header, but ignore it, thus
                // ensuring that the header has been skipped.
                new SAMFileReader(in).getFileHeader();

                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            case BAM: {
                final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath);

                // Find the block length, thankfully given to us by the BGZF
                // format. We need it in order to know how much gzipped data to
                // read after skipping the BAM header, so that we can only read
                // that much and then simply copy the remaining gzip blocks
                // directly.

                final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN);

                // Don't use readFully here, since EOF is fine.
                for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block
                        .capacity();) {
                    // EOF is fine.
                    if (prev == -1)
                        break;
                    read += prev;
                }

                // Find the BGZF subfield and extract the length from it.
                int blockLength = 0;
                for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) {
                    final int slen = (int) block.getShort(i + 2) & 0xffff;
                    if (block.getShort(i) == 0x4342 && slen == 2) {
                        blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1;
                        break;
                    }
                    i += 4 + slen;
                }
                if (blockLength == 0)
                    throw new IOException("BGZF extra field not found in " + inPath);

                if (verbose) {
                    System.err.printf("cat ::   first block length %d\n", blockLength);
                }

                // Skip the BAM header. Can't use SAMFileReader because it'll
                // use its own BlockCompressedInputStream.

                final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength);

                final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn);

                // Theoretically we could write into the ByteBuffer we already
                // had, since BlockCompressedInputStream needs to read the
                // header before it can decompress any data and thereafter we
                // can freely overwrite the first 8 bytes of the header... but
                // that's a bit too nasty, so let's not.
                final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN);

                // Read the BAM magic number and the SAM header length, verify
                // the magic, and skip the SAM header.

                IOUtils.readFully(bin, buf.array(), 0, 8);

                final int magic = buf.getInt(0), headerLen = buf.getInt(4);

                if (magic != 0x014d4142)
                    throw new IOException("bad BAM magic number in " + inPath);

                IOUtils.skipFully(bin, headerLen);

                // Skip the reference sequences.

                IOUtils.readFully(bin, buf.array(), 0, 4);

                for (int i = buf.getInt(0); i-- > 0;) {
                    // Read the reference name length and skip it along with the
                    // reference length.
                    IOUtils.readFully(bin, buf.array(), 0, 4);
                    IOUtils.skipFully(bin, buf.getInt(0) + 4);
                }

                // Recompress the rest of this gzip block.

                final int remaining = bin.available();

                if (verbose)
                    System.err.printf("cat ::   %d bytes to bgzip\n", remaining);

                if (remaining > 0) {
                    // The overload of IOUtils.copyBytes that takes "long length"
                    // was added only in Hadoop 0.20.205.0, which we don't want
                    // to depend on, so copy manually.
                    final byte[] remBuf = new byte[remaining];
                    IOUtils.readFully(bin, remBuf, 0, remBuf.length);

                    final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null);

                    bout.write(remBuf);
                    bout.flush();
                }

                // Just copy the raw bytes comprising the remaining blocks.

                in.seek(blockLength);
                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            }
        }
    } catch (IOException e) {
        System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage());
        return 8;
    }

    // For BAM, output the BGZF terminator.

    try {
        if (format == SAMFormat.BAM)
            out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

        out.close();
    } catch (IOException e) {
        System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage());
        return 9;
    }
    return 0;
}

From source file:org.shaf.core.io.hadoop.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (fileProcessed) {
        return false;
    }// w w  w .j a v  a2 s.  c  om

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(config);
    FSDataInputStream in = null;
    try {
        this.key.set(split.getPath().toString());
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        value.set(new String(result, 0, fileLength));

    } finally {
        IOUtils.closeStream(in);
    }
    this.fileProcessed = true;
    return true;
}