Example usage for org.apache.hadoop.io IOUtils readFully

List of usage examples for org.apache.hadoop.io IOUtils readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils readFully.

Prototype

public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException 

Source Link

Document

Reads len bytes in a loop.

Usage

From source file:org.apache.tez.runtime.library.common.sort.impl.IFileInputStream.java

License:Apache License

private int doRead(byte[] b, int off, int len) throws IOException {

    // If we are trying to read past the end of data, just read
    // the left over data
    int origLen = len;
    if (currentOffset + len > dataLength) {
        len = (int) (dataLength - currentOffset);
    }/*from  www  . ja  va2s . c o  m*/

    int bytesRead = in.read(b, off, len);

    if (bytesRead < 0) {
        String mesg = " CurrentOffset=" + currentOffset + ", offset=" + offset + ", off=" + off
                + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length
                + ", checksumSize=" + checksumSize;
        LOG.info(mesg);
        throw new ChecksumException("Checksum Error: " + mesg, 0);
    }

    checksum(b, off, bytesRead);

    currentOffset += bytesRead;

    if (disableChecksumValidation) {
        return bytesRead;
    }

    if (currentOffset == dataLength) {
        // The last four bytes are checksum. Strip them and verify
        sum.update(buffer, 0, offset);
        csum = new byte[checksumSize];
        IOUtils.readFully(in, csum, 0, checksumSize);
        if (!sum.compare(csum, 0)) {
            String mesg = "CurrentOffset=" + currentOffset + ", off=" + offset + ", dataLength=" + dataLength
                    + ", origLen=" + origLen + ", len=" + len + ", length=" + length + ", checksumSize="
                    + checksumSize + ", csum=" + Arrays.toString(csum) + ", sum=" + sum;
            LOG.info(mesg);

            throw new ChecksumException("Checksum Error: " + mesg, 0);
        }
    }
    return bytesRead;
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);/*  w  w w. ja va 2  s .c om*/

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.geotools.WholeFile.WholeFileRecordReader.java

License:Apache License

@Override
public boolean next(Text key, BytesWritable value) throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();

        String fileName = file.getName();
        key.set(fileName);/*  w w w . ja v a2  s  . co m*/

        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.geotools.WholeFile.WholeFileRecordReader_NewAPI.java

License:Apache License

public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;//from   ww w. java  2s.  c om
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.interactiverobotics.source_code_crawler.step6.TextFileRecordReader.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!processed) {
        final byte[] contents = new byte[(int) fileSplit.getLength()];
        final Path file = fileSplit.getPath();
        final FileSystem fileSystem = file.getFileSystem(configuration);
        FSDataInputStream in = null;/*from  w  ww  . ja  v  a2s . c o m*/
        try {
            in = fileSystem.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            key.set(file.toString());
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.kitesdk.cli.commands.TarImportCommand.java

License:Apache License

@Override
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "Tar path and target dataset URI are required.");

    Preconditions.checkArgument(SUPPORTED_TAR_COMPRESSION_TYPES.contains(compressionType),
            "Compression type " + compressionType + " is not supported");

    String source = targets.get(0);
    String datasetUri = targets.get(1);

    long blockSize = getConf().getLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);

    int success = 0;

    View<TarFileEntry> targetDataset;
    if (Datasets.exists(datasetUri)) {
        console.debug("Using existing dataset: {}", datasetUri);
        targetDataset = Datasets.load(datasetUri, TarFileEntry.class);
    } else {//from   ww w  .j  a  v a 2s. c o  m
        console.info("Creating new dataset: {}", datasetUri);
        DatasetDescriptor.Builder descriptorBuilder = new DatasetDescriptor.Builder();
        descriptorBuilder.format(Formats.AVRO);
        descriptorBuilder.schema(TarFileEntry.class);
        targetDataset = Datasets.create(datasetUri, descriptorBuilder.build(), TarFileEntry.class);
    }

    DatasetWriter<TarFileEntry> writer = targetDataset.newWriter();

    // Create a Tar input stream wrapped in appropriate decompressor
    // TODO: Enhancement would be to use native compression libs
    TarArchiveInputStream tis;
    CompressionType tarCompressionType = CompressionType.NONE;

    if (compressionType.isEmpty()) {
        if (source.endsWith(".tar")) {
            tarCompressionType = CompressionType.NONE;
        } else if (source.endsWith(".tar.gz")) {
            tarCompressionType = CompressionType.GZIP;
        } else if (source.endsWith(".tar.bz2")) {
            tarCompressionType = CompressionType.BZIP2;
        }
    } else if (compressionType.equals("gzip")) {
        tarCompressionType = CompressionType.GZIP;
    } else if (compressionType.equals("bzip2")) {
        tarCompressionType = CompressionType.BZIP2;
    } else {
        tarCompressionType = CompressionType.NONE;
    }

    console.info("Using {} compression", tarCompressionType);

    switch (tarCompressionType) {
    case GZIP:
        tis = new TarArchiveInputStream(new GzipCompressorInputStream(open(source)));
        break;
    case BZIP2:
        tis = new TarArchiveInputStream(new BZip2CompressorInputStream(open(source)));
        break;
    case NONE:
    default:
        tis = new TarArchiveInputStream(open(source));
    }

    TarArchiveEntry entry;

    try {
        int count = 0;
        while ((entry = tis.getNextTarEntry()) != null) {
            if (!entry.isDirectory()) {
                long size = entry.getSize();
                if (size >= blockSize) {
                    console.warn(
                            "Entry \"{}\" (size {}) is larger than the "
                                    + "HDFS block size of {}. This may result in remote block reads",
                            new Object[] { entry.getName(), size, blockSize });
                }

                byte[] buf = new byte[(int) size];
                try {
                    IOUtils.readFully(tis, buf, 0, (int) size);
                } catch (IOException e) {
                    console.error("Did not read entry {} successfully (entry size {})", entry.getName(), size);
                    success = 1;
                    throw e;
                }
                writer.write(TarFileEntry.newBuilder().setFilename(entry.getName())
                        .setFilecontent(ByteBuffer.wrap(buf)).build());
                count++;
            }
        }
        console.info("Added {} records to \"{}\"", count, targetDataset.getDataset().getName());
    } finally {
        IOUtils.closeStream(writer);
        IOUtils.closeStream(tis);
    }

    return success;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.Cat.java

License:Open Source License

@Override
protected int run(final CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("cat :: OUTPATH not given.");
        return 3;
    }/*  ww  w . j a  va2  s.  co m*/
    if (args.size() == 1) {
        System.err.println("cat :: no INPATHs given.");
        return 3;
    }

    final Path outPath = new Path(args.get(0));

    final List<String> ins = args.subList(1, args.size());

    final boolean verbose = parser.getBoolean(verboseOpt);

    final ValidationStringency stringency = Utils.toStringency(
            parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat");
    if (stringency == null)
        return 3;

    final Configuration conf = getConf();

    // Expand the glob patterns.

    final List<Path> inputs = new ArrayList<Path>(ins.size());
    for (final String in : ins) {
        try {
            final Path p = new Path(in);
            for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p))
                inputs.add(fstat.getPath());
        } catch (IOException e) {
            System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage());
        }
    }

    final Path input0 = inputs.get(0);

    // Infer the format from the first input path or contents.
    // the first input path or contents.

    SAMFormat format = SAMFormat.inferFromFilePath(input0);
    if (format == null) {
        try {
            format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0));
        } catch (IOException e) {
            System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
            return 4;
        }
        if (format == null) {
            System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0));
            return 4;
        }
    }

    // Choose the header.

    final SAMFileHeader header;
    try {
        final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0));

        header = r.getFileHeader();
        r.close();
    } catch (IOException e) {
        System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
        return 5;
    }

    // Open the output.

    final OutputStream out;

    try {
        out = outPath.getFileSystem(conf).create(outPath);
    } catch (IOException e) {
        System.err.printf("cat :: Could not create output file: %s\n", e.getMessage());
        return 6;
    }

    // Output the header.

    try {
        // Don't use the returned stream, because we're concatenating directly
        // and don't want to apply another layer of compression to BAM.
        new SAMOutputPreparer().prepareForRecords(out, format, header);

    } catch (IOException e) {
        System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage());
        return 7;
    }

    // Output the records from each file in the order given, converting if
    // necessary.

    int inIdx = 1;
    try {
        for (final Path inPath : inputs) {
            if (verbose) {
                System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size());
            }
            switch (format) {
            case SAM: {
                final InputStream in = inPath.getFileSystem(conf).open(inPath);

                // Use SAMFileReader to grab the header, but ignore it, thus
                // ensuring that the header has been skipped.
                new SAMFileReader(in).getFileHeader();

                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            case BAM: {
                final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath);

                // Find the block length, thankfully given to us by the BGZF
                // format. We need it in order to know how much gzipped data to
                // read after skipping the BAM header, so that we can only read
                // that much and then simply copy the remaining gzip blocks
                // directly.

                final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN);

                // Don't use readFully here, since EOF is fine.
                for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block
                        .capacity();) {
                    // EOF is fine.
                    if (prev == -1)
                        break;
                    read += prev;
                }

                // Find the BGZF subfield and extract the length from it.
                int blockLength = 0;
                for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) {
                    final int slen = (int) block.getShort(i + 2) & 0xffff;
                    if (block.getShort(i) == 0x4342 && slen == 2) {
                        blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1;
                        break;
                    }
                    i += 4 + slen;
                }
                if (blockLength == 0)
                    throw new IOException("BGZF extra field not found in " + inPath);

                if (verbose) {
                    System.err.printf("cat ::   first block length %d\n", blockLength);
                }

                // Skip the BAM header. Can't use SAMFileReader because it'll
                // use its own BlockCompressedInputStream.

                final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength);

                final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn);

                // Theoretically we could write into the ByteBuffer we already
                // had, since BlockCompressedInputStream needs to read the
                // header before it can decompress any data and thereafter we
                // can freely overwrite the first 8 bytes of the header... but
                // that's a bit too nasty, so let's not.
                final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN);

                // Read the BAM magic number and the SAM header length, verify
                // the magic, and skip the SAM header.

                IOUtils.readFully(bin, buf.array(), 0, 8);

                final int magic = buf.getInt(0), headerLen = buf.getInt(4);

                if (magic != 0x014d4142)
                    throw new IOException("bad BAM magic number in " + inPath);

                IOUtils.skipFully(bin, headerLen);

                // Skip the reference sequences.

                IOUtils.readFully(bin, buf.array(), 0, 4);

                for (int i = buf.getInt(0); i-- > 0;) {
                    // Read the reference name length and skip it along with the
                    // reference length.
                    IOUtils.readFully(bin, buf.array(), 0, 4);
                    IOUtils.skipFully(bin, buf.getInt(0) + 4);
                }

                // Recompress the rest of this gzip block.

                final int remaining = bin.available();

                if (verbose)
                    System.err.printf("cat ::   %d bytes to bgzip\n", remaining);

                if (remaining > 0) {
                    // The overload of IOUtils.copyBytes that takes "long length"
                    // was added only in Hadoop 0.20.205.0, which we don't want
                    // to depend on, so copy manually.
                    final byte[] remBuf = new byte[remaining];
                    IOUtils.readFully(bin, remBuf, 0, remBuf.length);

                    final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null);

                    bout.write(remBuf);
                    bout.flush();
                }

                // Just copy the raw bytes comprising the remaining blocks.

                in.seek(blockLength);
                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            }
        }
    } catch (IOException e) {
        System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage());
        return 8;
    }

    // For BAM, output the BGZF terminator.

    try {
        if (format == SAMFormat.BAM)
            out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

        out.close();
    } catch (IOException e) {
        System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage());
        return 9;
    }
    return 0;
}

From source file:org.shaf.core.io.hadoop.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (fileProcessed) {
        return false;
    }// w w  w .j a v  a2 s.  c  om

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(config);
    FSDataInputStream in = null;
    try {
        this.key.set(split.getPath().toString());
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        value.set(new String(result, 0, fileLength));

    } finally {
        IOUtils.closeStream(in);
    }
    this.fileProcessed = true;
    return true;
}