Example usage for org.apache.hadoop.io IOUtils copyBytes

List of usage examples for org.apache.hadoop.io IOUtils copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils copyBytes.

Prototype

public static void copyBytes(InputStream in, OutputStream out, long count, boolean close) throws IOException 

Source Link

Document

Copies count bytes from one stream to another.

Usage

From source file:eml.studio.server.util.HDFSIO.java

License:Open Source License

/**
 * The same method as hdfs dfs -cat//from   w w w.  j a v a  2s  .  co m
 * @param uri target file uri
 * @return  content string in file
 * @throws IOException
 */
public static String cat(String uri) throws IOException {
    InputStream in = getInputStream(uri);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    IOUtils.copyBytes(in, out, 4096, true);
    return out.toString();
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Cat.java

License:Open Source License

@Override
protected int run(final CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("cat :: OUTPATH not given.");
        return 3;
    }/*from  ww  w  .  j av  a  2  s.  co m*/
    if (args.size() == 1) {
        System.err.println("cat :: no INPATHs given.");
        return 3;
    }

    final Path outPath = new Path(args.get(0));

    final List<String> ins = args.subList(1, args.size());

    final boolean verbose = parser.getBoolean(verboseOpt);

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat");
    if (stringency == null)
        return 3;

    final Configuration conf = getConf();

    // Expand the glob patterns.

    final List<Path> inputs = new ArrayList<Path>(ins.size());
    for (final String in : ins) {
        try {
            final Path p = new Path(in);
            for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p))
                inputs.add(fstat.getPath());
        } catch (IOException e) {
            System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage());
        }
    }

    final Path input0 = inputs.get(0);

    // Infer the format from the first input path or contents.
    // the first input path or contents.

    SAMFormat format = SAMFormat.inferFromFilePath(input0);
    if (format == null) {
        try {
            format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0));
        } catch (IOException e) {
            System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
            return 4;
        }
        if (format == null) {
            System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0));
            return 4;
        }
    }

    // Choose the header.

    final SAMFileHeader header;
    try {
        final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0));

        header = r.getFileHeader();
        r.close();
    } catch (IOException e) {
        System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage());
        return 5;
    }

    // Open the output.

    final OutputStream out;

    try {
        out = outPath.getFileSystem(conf).create(outPath);
    } catch (IOException e) {
        System.err.printf("cat :: Could not create output file: %s\n", e.getMessage());
        return 6;
    }

    // Output the header.

    try {
        // Don't use the returned stream, because we're concatenating directly
        // and don't want to apply another layer of compression to BAM.
        new SAMOutputPreparer().prepareForRecords(out, format, header);

    } catch (IOException e) {
        System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage());
        return 7;
    }

    // Output the records from each file in the order given, converting if
    // necessary.

    int inIdx = 1;
    try {
        for (final Path inPath : inputs) {
            if (verbose) {
                System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size());
            }
            switch (format) {
            case SAM: {
                final InputStream in = inPath.getFileSystem(conf).open(inPath);

                // Use SAMFileReader to grab the header, but ignore it, thus
                // ensuring that the header has been skipped.
                new SAMFileReader(in).getFileHeader();

                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            case BAM: {
                final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath);

                // Find the block length, thankfully given to us by the BGZF
                // format. We need it in order to know how much gzipped data to
                // read after skipping the BAM header, so that we can only read
                // that much and then simply copy the remaining gzip blocks
                // directly.

                final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN);

                // Don't use readFully here, since EOF is fine.
                for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block
                        .capacity();) {
                    // EOF is fine.
                    if (prev == -1)
                        break;
                    read += prev;
                }

                // Find the BGZF subfield and extract the length from it.
                int blockLength = 0;
                for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) {
                    final int slen = (int) block.getShort(i + 2) & 0xffff;
                    if (block.getShort(i) == 0x4342 && slen == 2) {
                        blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1;
                        break;
                    }
                    i += 4 + slen;
                }
                if (blockLength == 0)
                    throw new IOException("BGZF extra field not found in " + inPath);

                if (verbose) {
                    System.err.printf("cat ::   first block length %d\n", blockLength);
                }

                // Skip the BAM header. Can't use SAMFileReader because it'll
                // use its own BlockCompressedInputStream.

                final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength);

                final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn);

                // Theoretically we could write into the ByteBuffer we already
                // had, since BlockCompressedInputStream needs to read the
                // header before it can decompress any data and thereafter we
                // can freely overwrite the first 8 bytes of the header... but
                // that's a bit too nasty, so let's not.
                final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN);

                // Read the BAM magic number and the SAM header length, verify
                // the magic, and skip the SAM header.

                IOUtils.readFully(bin, buf.array(), 0, 8);

                final int magic = buf.getInt(0), headerLen = buf.getInt(4);

                if (magic != 0x014d4142)
                    throw new IOException("bad BAM magic number in " + inPath);

                IOUtils.skipFully(bin, headerLen);

                // Skip the reference sequences.

                IOUtils.readFully(bin, buf.array(), 0, 4);

                for (int i = buf.getInt(0); i-- > 0;) {
                    // Read the reference name length and skip it along with the
                    // reference length.
                    IOUtils.readFully(bin, buf.array(), 0, 4);
                    IOUtils.skipFully(bin, buf.getInt(0) + 4);
                }

                // Recompress the rest of this gzip block.

                final int remaining = bin.available();

                if (verbose)
                    System.err.printf("cat ::   %d bytes to bgzip\n", remaining);

                if (remaining > 0) {
                    // The overload of IOUtils.copyBytes that takes "long length"
                    // was added only in Hadoop 0.20.205.0, which we don't want
                    // to depend on, so copy manually.
                    final byte[] remBuf = new byte[remaining];
                    IOUtils.readFully(bin, remBuf, 0, remBuf.length);

                    final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null);

                    bout.write(remBuf);
                    bout.flush();
                }

                // Just copy the raw bytes comprising the remaining blocks.

                in.seek(blockLength);
                IOUtils.copyBytes(in, out, conf, false);
                in.close();
                break;
            }
            }
        }
    } catch (IOException e) {
        System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage());
        return 8;
    }

    // For BAM, output the BGZF terminator.

    try {
        if (format == SAMFormat.BAM)
            out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);

        out.close();
    } catch (IOException e) {
        System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage());
        return 9;
    }
    return 0;
}

From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java

License:Open Source License

/** Merges the files in the given directory that have names given by
 * getMergeableWorkFile() into out./*from w w w . j a va 2  s. com*/
 *
 * Outputs progress reports if commandName is non-null.
 */
public static void mergeInto(OutputStream out, Path directory, String basePrefix, String basePostfix,
        Configuration conf, String commandName) throws IOException {
    final FileSystem fs = directory.getFileSystem(conf);

    final FileStatus[] parts = fs.globStatus(new Path(directory,
            basePrefix + conf.get(WORK_FILENAME_PROPERTY) + basePostfix + "-[0-9][0-9][0-9][0-9][0-9][0-9]*"));

    int i = 0;
    Timer t = new Timer();
    for (final FileStatus part : parts) {
        if (commandName != null) {
            System.out.printf("%s :: Merging part %d (size %d)...", commandName, ++i, part.getLen());
            System.out.flush();

            t.start();
        }

        final InputStream in = fs.open(part.getPath());
        IOUtils.copyBytes(in, out, conf, false);
        in.close();

        if (commandName != null)
            System.out.printf(" done in %d.%03d s.\n", t.stopS(), t.fms());
    }
    for (final FileStatus part : parts)
        fs.delete(part.getPath(), false);
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Copy all files in a directory to one output file (merge).
 * @param paths list of path files to concat
 * @param dstPath destination path//www.  j  a v  a  2 s  .c  o m
 * @param deleteSource true if the original files must be deleted
 * @param overwrite true if an existing destination file must be deleted
 * @param conf Configuration
 * @param addString string to add
 */
public static boolean concat(final List<Path> paths, final Path dstPath, final boolean deleteSource,
        final boolean overwrite, final Configuration conf, final String addString) throws IOException {

    if (paths == null) {
        throw new NullPointerException("The list of path to concat is null");
    }

    if (paths.size() == 0) {
        return false;
    }

    if (dstPath == null) {
        throw new NullPointerException("The destination path is null");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null.");
    }

    final FileSystem srcFs = paths.get(0).getFileSystem(conf);
    final FileSystem dstFs = dstPath.getFileSystem(conf);

    if (!overwrite && dstFs.exists(dstPath)) {
        throw new IOException("The output file already exists: " + dstPath);
    }

    try (OutputStream out = dstFs.create(dstPath)) {
        // FileStatus contents[] = srcFS.listStatus(srcDir);
        // for (int i = 0; i < contents.length; i++) {
        for (Path p : paths) {
            if (!srcFs.getFileStatus(p).isDirectory()) {
                try (InputStream in = srcFs.open(p)) {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (addString != null) {
                        out.write(addString.getBytes(FileCharsets.UTF8_CHARSET));
                    }

                }
            }
        }

    }

    if (deleteSource) {
        for (Path p : paths) {
            if (!srcFs.delete(p, false)) {
                return false;
            }
        }
    }

    return true;
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * Copy a file from a srcFs {@link FileSystem} to a dstFs {@link FileSystem}. The src {@link Path} must be a file,
 * that is {@link FileSystem#isFile(Path)} must return true for src.
 *
 * <p>//  w  w  w  . j av a  2  s .co  m
 *   If overwrite is specified to true, this method may delete the dst directory even if the copy from src to dst fails.
 * </p>
 *
 * @param srcFs the src {@link FileSystem} to copy the file from
 * @param src the src {@link Path} to copy
 * @param dstFs the destination {@link FileSystem} to write to
 * @param dst the destination {@link Path} to write to
 * @param overwrite true if the dst {@link Path} should be overwritten, false otherwise
 */
public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite,
        Configuration conf) throws IOException {

    Preconditions.checkArgument(srcFs.isFile(src),
            String.format("Cannot copy from %s to %s because src is not a file", src, dst));
    Preconditions.checkArgument(overwrite || !dstFs.exists(dst),
            String.format("Cannot copy from %s to %s because dst exists", src, dst));

    try (InputStream in = srcFs.open(src); OutputStream out = dstFs.create(dst, overwrite)) {
        IOUtils.copyBytes(in, out, conf, false);
    } catch (Throwable t1) {
        try {
            deleteIfExists(dstFs, dst, true);
        } catch (Throwable t2) {
            // Do nothing
        }
        throw t1;
    }
}

From source file:hadoop.examples.hdfs.FileCopyWithProgress.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String uri = "hdfs://exoplatform:9000/user/haint/input-1/hadoop-0.20.203.1-SNAPSHOT.tar.gz";
    InputStream in = new BufferedInputStream(
            new FileInputStream("/home/haint/java/research/hadoop-0.20.203.1-SNAPSHOT.tar.gz"));
    FileSystem fs = FileSystem.get(URI.create(uri), new Configuration());
    OutputStream out = fs.create(new Path(uri), new Progressable() {
        public void progress() {
            System.out.print(".");
        }/*  w w  w.j a  va  2 s  . c  om*/
    });

    IOUtils.copyBytes(in, out, 4096, true);
}

From source file:hadoop.examples.hdfs.FileSystemCat.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String uri = "hdfs://exoplatform:9000/user/haint/input-0/test.txt";
    FileSystem fs = FileSystem.get(URI.create(uri), new Configuration());
    InputStream in = null;/*from  w ww  .  j av  a2s  . c o  m*/
    try {
        in = fs.open(new Path(uri));
        IOUtils.copyBytes(in, System.out, 4096, false);
    } finally {
        IOUtils.closeStream(in);
    }
    System.out.println("---------------------------------------------------");
    FSDataInputStream fsDataInputStream = null;
    try {
        fsDataInputStream = fs.open(new Path(uri));
        IOUtils.copyBytes(fsDataInputStream, System.out, 256, false);
        System.out.println("---------------------------------------------------");
        fsDataInputStream.seek(0);
        IOUtils.copyBytes(fsDataInputStream, System.out, 256, false);
    } finally {
        IOUtils.closeStream(fsDataInputStream);
    }
}

From source file:Hama_MMMAS.HAMA_TEST.java

static void printOutput(HamaConfiguration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] files = fs.listStatus(TMP_OUTPUT);
    for (FileStatus file : files) {
        if (file.getLen() > 0) {
            try (FSDataInputStream in = fs.open(file.getPath())) {
                IOUtils.copyBytes(in, System.out, conf, false);
            }//from www .j a  v  a2s  .co  m
            break;
        }
    }

    fs.delete(TMP_OUTPUT, true);
}

From source file:hdfs.FileUtil.java

License:Apache License

/** Copy files between FileSystems. */
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        boolean overwrite, Configuration conf) throws IOException {
    dst = checkDest(src.getName(), dstFS, dst, overwrite);

    if (srcFS.getFileStatus(src).isDir()) {
        checkDependencies(srcFS, src, dstFS, dst);
        if (!dstFS.mkdirs(dst)) {
            return false;
        }/*from   w  w w.j  a  v  a 2 s.  c o m*/
        FileStatus contents[] = srcFS.listStatus(src);
        for (int i = 0; i < contents.length; i++) {
            copy(srcFS, contents[i].getPath(), dstFS, new Path(dst, contents[i].getPath().getName()),
                    deleteSource, overwrite, conf);
        }
    } else if (srcFS.isFile(src)) {
        InputStream in = null;
        OutputStream out = null;
        try {
            in = srcFS.open(src);
            out = dstFS.create(dst, overwrite);
            IOUtils.copyBytes(in, out, conf, true);
        } catch (IOException e) {
            IOUtils.closeStream(out);
            IOUtils.closeStream(in);
            throw e;
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
        return srcFS.delete(src, true);
    } else {
        return true;
    }

}

From source file:hdfs.FileUtil.java

License:Apache License

/** Copy all files in a directory to one output file (merge). */
public static boolean copyMerge(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstFile,
        boolean deleteSource, Configuration conf, String addString) throws IOException {
    dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);

    if (!srcFS.getFileStatus(srcDir).isDir())
        return false;

    OutputStream out = dstFS.create(dstFile);

    try {//from   ww  w. j a v  a2 s .c  o  m
        FileStatus contents[] = srcFS.listStatus(srcDir);
        for (int i = 0; i < contents.length; i++) {
            if (!contents[i].isDir()) {
                InputStream in = srcFS.open(contents[i].getPath());
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (addString != null)
                        out.write(addString.getBytes("UTF-8"));

                } finally {
                    in.close();
                }
            }
        }
    } finally {
        out.close();
    }

    if (deleteSource) {
        return srcFS.delete(srcDir, true);
    } else {
        return true;
    }
}