List of usage examples for org.apache.hadoop.io IOUtils copyBytes
public static void copyBytes(InputStream in, OutputStream out, long count, boolean close) throws IOException
From source file:eml.studio.server.util.HDFSIO.java
License:Open Source License
/** * The same method as hdfs dfs -cat//from w w w. j a v a 2s . co m * @param uri target file uri * @return content string in file * @throws IOException */ public static String cat(String uri) throws IOException { InputStream in = getInputStream(uri); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copyBytes(in, out, 4096, true); return out.toString(); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Cat.java
License:Open Source License
@Override protected int run(final CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("cat :: OUTPATH not given."); return 3; }/*from ww w . j av a 2 s. co m*/ if (args.size() == 1) { System.err.println("cat :: no INPATHs given."); return 3; } final Path outPath = new Path(args.get(0)); final List<String> ins = args.subList(1, args.size()); final boolean verbose = parser.getBoolean(verboseOpt); final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat"); if (stringency == null) return 3; final Configuration conf = getConf(); // Expand the glob patterns. final List<Path> inputs = new ArrayList<Path>(ins.size()); for (final String in : ins) { try { final Path p = new Path(in); for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p)) inputs.add(fstat.getPath()); } catch (IOException e) { System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage()); } } final Path input0 = inputs.get(0); // Infer the format from the first input path or contents. // the first input path or contents. SAMFormat format = SAMFormat.inferFromFilePath(input0); if (format == null) { try { format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0)); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 4; } if (format == null) { System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0)); return 4; } } // Choose the header. final SAMFileHeader header; try { final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0)); header = r.getFileHeader(); r.close(); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 5; } // Open the output. final OutputStream out; try { out = outPath.getFileSystem(conf).create(outPath); } catch (IOException e) { System.err.printf("cat :: Could not create output file: %s\n", e.getMessage()); return 6; } // Output the header. try { // Don't use the returned stream, because we're concatenating directly // and don't want to apply another layer of compression to BAM. new SAMOutputPreparer().prepareForRecords(out, format, header); } catch (IOException e) { System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage()); return 7; } // Output the records from each file in the order given, converting if // necessary. int inIdx = 1; try { for (final Path inPath : inputs) { if (verbose) { System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size()); } switch (format) { case SAM: { final InputStream in = inPath.getFileSystem(conf).open(inPath); // Use SAMFileReader to grab the header, but ignore it, thus // ensuring that the header has been skipped. new SAMFileReader(in).getFileHeader(); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } case BAM: { final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath); // Find the block length, thankfully given to us by the BGZF // format. We need it in order to know how much gzipped data to // read after skipping the BAM header, so that we can only read // that much and then simply copy the remaining gzip blocks // directly. final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN); // Don't use readFully here, since EOF is fine. for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block .capacity();) { // EOF is fine. if (prev == -1) break; read += prev; } // Find the BGZF subfield and extract the length from it. int blockLength = 0; for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) { final int slen = (int) block.getShort(i + 2) & 0xffff; if (block.getShort(i) == 0x4342 && slen == 2) { blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1; break; } i += 4 + slen; } if (blockLength == 0) throw new IOException("BGZF extra field not found in " + inPath); if (verbose) { System.err.printf("cat :: first block length %d\n", blockLength); } // Skip the BAM header. Can't use SAMFileReader because it'll // use its own BlockCompressedInputStream. final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength); final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn); // Theoretically we could write into the ByteBuffer we already // had, since BlockCompressedInputStream needs to read the // header before it can decompress any data and thereafter we // can freely overwrite the first 8 bytes of the header... but // that's a bit too nasty, so let's not. final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN); // Read the BAM magic number and the SAM header length, verify // the magic, and skip the SAM header. IOUtils.readFully(bin, buf.array(), 0, 8); final int magic = buf.getInt(0), headerLen = buf.getInt(4); if (magic != 0x014d4142) throw new IOException("bad BAM magic number in " + inPath); IOUtils.skipFully(bin, headerLen); // Skip the reference sequences. IOUtils.readFully(bin, buf.array(), 0, 4); for (int i = buf.getInt(0); i-- > 0;) { // Read the reference name length and skip it along with the // reference length. IOUtils.readFully(bin, buf.array(), 0, 4); IOUtils.skipFully(bin, buf.getInt(0) + 4); } // Recompress the rest of this gzip block. final int remaining = bin.available(); if (verbose) System.err.printf("cat :: %d bytes to bgzip\n", remaining); if (remaining > 0) { // The overload of IOUtils.copyBytes that takes "long length" // was added only in Hadoop 0.20.205.0, which we don't want // to depend on, so copy manually. final byte[] remBuf = new byte[remaining]; IOUtils.readFully(bin, remBuf, 0, remBuf.length); final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null); bout.write(remBuf); bout.flush(); } // Just copy the raw bytes comprising the remaining blocks. in.seek(blockLength); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } } } } catch (IOException e) { System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage()); return 8; } // For BAM, output the BGZF terminator. try { if (format == SAMFormat.BAM) out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); out.close(); } catch (IOException e) { System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage()); return 9; } return 0; }
From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java
License:Open Source License
/** Merges the files in the given directory that have names given by * getMergeableWorkFile() into out./*from w w w . j a va 2 s. com*/ * * Outputs progress reports if commandName is non-null. */ public static void mergeInto(OutputStream out, Path directory, String basePrefix, String basePostfix, Configuration conf, String commandName) throws IOException { final FileSystem fs = directory.getFileSystem(conf); final FileStatus[] parts = fs.globStatus(new Path(directory, basePrefix + conf.get(WORK_FILENAME_PROPERTY) + basePostfix + "-[0-9][0-9][0-9][0-9][0-9][0-9]*")); int i = 0; Timer t = new Timer(); for (final FileStatus part : parts) { if (commandName != null) { System.out.printf("%s :: Merging part %d (size %d)...", commandName, ++i, part.getLen()); System.out.flush(); t.start(); } final InputStream in = fs.open(part.getPath()); IOUtils.copyBytes(in, out, conf, false); in.close(); if (commandName != null) System.out.printf(" done in %d.%03d s.\n", t.stopS(), t.fms()); } for (final FileStatus part : parts) fs.delete(part.getPath(), false); }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Copy all files in a directory to one output file (merge). * @param paths list of path files to concat * @param dstPath destination path//www. j a v a 2 s .c o m * @param deleteSource true if the original files must be deleted * @param overwrite true if an existing destination file must be deleted * @param conf Configuration * @param addString string to add */ public static boolean concat(final List<Path> paths, final Path dstPath, final boolean deleteSource, final boolean overwrite, final Configuration conf, final String addString) throws IOException { if (paths == null) { throw new NullPointerException("The list of path to concat is null"); } if (paths.size() == 0) { return false; } if (dstPath == null) { throw new NullPointerException("The destination path is null"); } if (conf == null) { throw new NullPointerException("The configuration is null."); } final FileSystem srcFs = paths.get(0).getFileSystem(conf); final FileSystem dstFs = dstPath.getFileSystem(conf); if (!overwrite && dstFs.exists(dstPath)) { throw new IOException("The output file already exists: " + dstPath); } try (OutputStream out = dstFs.create(dstPath)) { // FileStatus contents[] = srcFS.listStatus(srcDir); // for (int i = 0; i < contents.length; i++) { for (Path p : paths) { if (!srcFs.getFileStatus(p).isDirectory()) { try (InputStream in = srcFs.open(p)) { IOUtils.copyBytes(in, out, conf, false); if (addString != null) { out.write(addString.getBytes(FileCharsets.UTF8_CHARSET)); } } } } } if (deleteSource) { for (Path p : paths) { if (!srcFs.delete(p, false)) { return false; } } } return true; }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Copy a file from a srcFs {@link FileSystem} to a dstFs {@link FileSystem}. The src {@link Path} must be a file, * that is {@link FileSystem#isFile(Path)} must return true for src. * * <p>// w w w . j av a 2 s .co m * If overwrite is specified to true, this method may delete the dst directory even if the copy from src to dst fails. * </p> * * @param srcFs the src {@link FileSystem} to copy the file from * @param src the src {@link Path} to copy * @param dstFs the destination {@link FileSystem} to write to * @param dst the destination {@link Path} to write to * @param overwrite true if the dst {@link Path} should be overwritten, false otherwise */ public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { Preconditions.checkArgument(srcFs.isFile(src), String.format("Cannot copy from %s to %s because src is not a file", src, dst)); Preconditions.checkArgument(overwrite || !dstFs.exists(dst), String.format("Cannot copy from %s to %s because dst exists", src, dst)); try (InputStream in = srcFs.open(src); OutputStream out = dstFs.create(dst, overwrite)) { IOUtils.copyBytes(in, out, conf, false); } catch (Throwable t1) { try { deleteIfExists(dstFs, dst, true); } catch (Throwable t2) { // Do nothing } throw t1; } }
From source file:hadoop.examples.hdfs.FileCopyWithProgress.java
License:Open Source License
public static void main(String[] args) throws Exception { String uri = "hdfs://exoplatform:9000/user/haint/input-1/hadoop-0.20.203.1-SNAPSHOT.tar.gz"; InputStream in = new BufferedInputStream( new FileInputStream("/home/haint/java/research/hadoop-0.20.203.1-SNAPSHOT.tar.gz")); FileSystem fs = FileSystem.get(URI.create(uri), new Configuration()); OutputStream out = fs.create(new Path(uri), new Progressable() { public void progress() { System.out.print("."); }/* w w w.j a va 2 s . c om*/ }); IOUtils.copyBytes(in, out, 4096, true); }
From source file:hadoop.examples.hdfs.FileSystemCat.java
License:Open Source License
public static void main(String[] args) throws Exception { String uri = "hdfs://exoplatform:9000/user/haint/input-0/test.txt"; FileSystem fs = FileSystem.get(URI.create(uri), new Configuration()); InputStream in = null;/*from w ww . j av a2s . c o m*/ try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } System.out.println("---------------------------------------------------"); FSDataInputStream fsDataInputStream = null; try { fsDataInputStream = fs.open(new Path(uri)); IOUtils.copyBytes(fsDataInputStream, System.out, 256, false); System.out.println("---------------------------------------------------"); fsDataInputStream.seek(0); IOUtils.copyBytes(fsDataInputStream, System.out, 256, false); } finally { IOUtils.closeStream(fsDataInputStream); } }
From source file:Hama_MMMAS.HAMA_TEST.java
static void printOutput(HamaConfiguration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.listStatus(TMP_OUTPUT); for (FileStatus file : files) { if (file.getLen() > 0) { try (FSDataInputStream in = fs.open(file.getPath())) { IOUtils.copyBytes(in, System.out, conf, false); }//from www .j a v a2s .co m break; } } fs.delete(TMP_OUTPUT, true); }
From source file:hdfs.FileUtil.java
License:Apache License
/** Copy files between FileSystems. */ public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException { dst = checkDest(src.getName(), dstFS, dst, overwrite); if (srcFS.getFileStatus(src).isDir()) { checkDependencies(srcFS, src, dstFS, dst); if (!dstFS.mkdirs(dst)) { return false; }/*from w w w.j a v a 2 s. c o m*/ FileStatus contents[] = srcFS.listStatus(src); for (int i = 0; i < contents.length; i++) { copy(srcFS, contents[i].getPath(), dstFS, new Path(dst, contents[i].getPath().getName()), deleteSource, overwrite, conf); } } else if (srcFS.isFile(src)) { InputStream in = null; OutputStream out = null; try { in = srcFS.open(src); out = dstFS.create(dst, overwrite); IOUtils.copyBytes(in, out, conf, true); } catch (IOException e) { IOUtils.closeStream(out); IOUtils.closeStream(in); throw e; } } else { throw new IOException(src.toString() + ": No such file or directory"); } if (deleteSource) { return srcFS.delete(src, true); } else { return true; } }
From source file:hdfs.FileUtil.java
License:Apache License
/** Copy all files in a directory to one output file (merge). */ public static boolean copyMerge(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstFile, boolean deleteSource, Configuration conf, String addString) throws IOException { dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); if (!srcFS.getFileStatus(srcDir).isDir()) return false; OutputStream out = dstFS.create(dstFile); try {//from ww w. j a v a2 s .c o m FileStatus contents[] = srcFS.listStatus(srcDir); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDir()) { InputStream in = srcFS.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); if (addString != null) out.write(addString.getBytes("UTF-8")); } finally { in.close(); } } } } finally { out.close(); } if (deleteSource) { return srcFS.delete(srcDir, true); } else { return true; } }