List of usage examples for org.apache.hadoop.io IOUtils copyBytes
public static void copyBytes(InputStream in, OutputStream out, long count, boolean close) throws IOException
From source file:com.endgame.binarypig.util.StreamUtilsTest.java
License:Apache License
public void testWriteToFile() throws Exception { File binaryFile = new File("/tmp/" + UUID.randomUUID().toString()); binaryFile.deleteOnExit();/*w w w.j a va 2s. co m*/ assertFalse(binaryFile.exists()); BytesWritable value = new BytesWritable("This is a test".getBytes()); try { StreamUtils.writeToFile(value, binaryFile); } catch (IOException e) { e.printStackTrace(); fail("This should not throw an Exception"); } assertTrue(binaryFile.exists()); ByteArrayOutputStream bytes = new ByteArrayOutputStream(); IOUtils.copyBytes(new FileInputStream(binaryFile), bytes, 100, true); assertEquals("This is a test", new String(bytes.toByteArray())); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** * Print from src to stdout./*from ww w . j a va2 s . c o m*/ */ private void printToStdout(InputStream in) throws IOException { try { IOUtils.copyBytes(in, System.out, getConf(), false); } finally { in.close(); } }
From source file:com.hadoop.compression.lzo.LzoIndex.java
License:Open Source License
/** * Read the index of the lzo file./*from ww w . j av a 2 s . c o m*/ * @param fs The index file is on this file system. * @param lzoFile the file whose index we are reading -- NOT the index file itself. That is, * pass in filename.lzo, not filename.lzo.index, for this parameter. * @throws IOException */ public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException { FSDataInputStream indexIn = null; Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX); try { indexIn = fs.open(indexFile); } catch (IOException fileNotFound) { // return empty index, fall back to the unsplittable mode return new LzoIndex(); } int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks) DataOutputBuffer bytes = new DataOutputBuffer(capacity); // copy indexIn and close it IOUtils.copyBytes(indexIn, bytes, 4 * 1024, true); ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength()); int blocks = bytesIn.remaining() / 8; LzoIndex index = new LzoIndex(blocks); for (int i = 0; i < blocks; i++) { index.set(i, bytesIn.getLong()); } return index; }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
/** * Copies data from the given input stream to an HDFS file at the given path. This method will close the input stream. *///from w w w . j a va 2s .c o m protected final void copyStreamToHdfs(InputStream resource, String hdfsDestFileName) throws IOException { FileSystem fs = getFileSystem(); FSDataOutputStream os = fs.create(new Path(hdfsDestFileName), false); IOUtils.copyBytes(resource, os, fs.getConf(), true); }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
/** * /*w w w. j a v a 2s . c o m*/ * @param srcFileName * @param fileName * @param rlen * @param clen * @param nnz * @throws IOException */ public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path src = new Path(srcFileName); Path merge = new Path(fileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(merge)) { hdfs.delete(merge, true); } OutputStream out = hdfs.create(merge, true); // write out the header first StringBuilder sb = new StringBuilder(); sb.append("%%MatrixMarket matrix coordinate real general\n"); // output number of rows, number of columns and number of nnz sb.append(rlen + " " + clen + " " + nnz + "\n"); out.write(sb.toString().getBytes()); // if the source is a directory if (hdfs.getFileStatus(src).isDirectory()) { try { FileStatus[] contents = hdfs.listStatus(src); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { InputStream in = hdfs.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); } finally { IOUtilFunctions.closeSilently(in); } } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(src)) { InputStream in = null; try { in = hdfs.open(src); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(src.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. * The part files are created by CSV_WRITE MR job. * /*from w w w .j ava 2s. c om*/ * This method is invoked from CP-write instruction. * * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path mergedFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(mergedFilePath)) { hdfs.delete(mergedFilePath, true); } OutputStream out = hdfs.create(mergedFilePath, true); // write out the header, if needed if (csvprop.hasHeader()) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(csvprop.getDelim()); } sb.append('\n'); out.write(sb.toString().getBytes()); sb.setLength(0); } // if the source is a directory if (hdfs.isDirectory(srcFilePath)) { try { FileStatus[] contents = hdfs.listStatus(srcFilePath); Path[] partPaths = new Path[contents.length]; int numPartFiles = 0; for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { partPaths[i] = contents[i].getPath(); numPartFiles++; } } Arrays.sort(partPaths); for (int i = 0; i < numPartFiles; i++) { InputStream in = hdfs.open(partPaths[i]); try { IOUtils.copyBytes(in, out, conf, false); if (i < numPartFiles - 1) out.write('\n'); } finally { IOUtilFunctions.closeSilently(in); } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(srcFilePath)) { InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * //from www. j a v a 2 s. c om * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ @SuppressWarnings("unchecked") public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /* * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already //boolean ret1 = hdfs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. //boolean ret2 = hdfs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ //boolean ret3 = hdfs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv //boolean ret4 = hdfs.rename(srcFilePath, destFilePath); //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (hdfs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<Path>(); for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = hdfs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = hdfs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart hdfs.delete(firstpart, true); hdfs.rename(tmp, firstpart); // rename srcfile to destFile hdfs.delete(destFilePath, true); hdfs.createNewFile(destFilePath); // force the creation of directory structure hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure hdfs.rename(srcFilePath, destFilePath); // move the data } else if (hdfs.isFile(srcFilePath)) { // create destination file OutputStream out = hdfs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ramsane.samplehadoop.ReadFile.java
public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); FSDataInputStream is = null;/*from www .j av a2 s . c o m*/ try { // get file system object.... FileSystem fs = FileSystem.get(conf); is = fs.open(new Path("/big")); IOUtils.copyBytes(is, System.out, 4096, false); } catch (IOException ex) { System.out.println(ex.getMessage()); } finally { IOUtils.closeStream(is); } }
From source file:com.ramsane.samplehadoop.ReadTwice.java
public static void main(String[] args) { Configuration cfg = new Configuration(); cfg.set("fs.defaultFS", "hdfs://localhost:9000"); FSDataInputStream in = null;/* www . ja v a 2 s . c o m*/ try { FileSystem fs = FileSystem.get(cfg); in = fs.open(new Path("/big")); System.out.println("First TIme..."); IOUtils.copyBytes(in, System.out, 4096, false); System.out.println("Second time.."); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); } catch (IOException ex) { System.out.println(ex.getMessage()); } finally { IOUtils.closeStream(in); } }
From source file:com.sec.webs.classification.hdfs.HdfsFileOperation.java
License:Open Source License
/** * Read file from HDFS/*from w w w . ja v a 2 s . c o m*/ * * @param fileName */ public void ReadFile(String fileName) { try { FSDataInputStream dis = fs.open(new Path(fileName)); IOUtils.copyBytes(dis, System.out, 4096, false); dis.close(); } catch (IOException e) { e.printStackTrace(); } }