Example usage for org.apache.hadoop.io IOUtils copyBytes

List of usage examples for org.apache.hadoop.io IOUtils copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils copyBytes.

Prototype

public static void copyBytes(InputStream in, OutputStream out, long count, boolean close) throws IOException 

Source Link

Document

Copies count bytes from one stream to another.

Usage

From source file:com.endgame.binarypig.util.StreamUtilsTest.java

License:Apache License

public void testWriteToFile() throws Exception {

    File binaryFile = new File("/tmp/" + UUID.randomUUID().toString());
    binaryFile.deleteOnExit();/*w w w.j  a  va 2s.  co m*/

    assertFalse(binaryFile.exists());
    BytesWritable value = new BytesWritable("This is a test".getBytes());

    try {
        StreamUtils.writeToFile(value, binaryFile);
    } catch (IOException e) {
        e.printStackTrace();
        fail("This should not throw an Exception");
    }

    assertTrue(binaryFile.exists());

    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    IOUtils.copyBytes(new FileInputStream(binaryFile), bytes, 100, true);
    assertEquals("This is a test", new String(bytes.toByteArray()));
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/** 
* Print from src to stdout./*from ww  w .  j  a  va2  s  .  c o m*/
*/
private void printToStdout(InputStream in) throws IOException {
    try {
        IOUtils.copyBytes(in, System.out, getConf(), false);
    } finally {
        in.close();
    }
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Read the index of the lzo file./*from   ww w . j  av  a  2 s  . c o m*/
        
 * @param fs The index file is on this file system.
 * @param lzoFile the file whose index we are reading -- NOT the index file itself.  That is,
 * pass in filename.lzo, not filename.lzo.index, for this parameter.
 * @throws IOException
 */
public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException {
    FSDataInputStream indexIn = null;
    Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);

    try {
        indexIn = fs.open(indexFile);
    } catch (IOException fileNotFound) {
        // return empty index, fall back to the unsplittable mode
        return new LzoIndex();
    }

    int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks)
    DataOutputBuffer bytes = new DataOutputBuffer(capacity);

    // copy indexIn and close it
    IOUtils.copyBytes(indexIn, bytes, 4 * 1024, true);

    ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength());
    int blocks = bytesIn.remaining() / 8;
    LzoIndex index = new LzoIndex(blocks);

    for (int i = 0; i < blocks; i++) {
        index.set(i, bytesIn.getLong());
    }

    return index;
}

From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java

License:Apache License

/**
 * Copies data from the given input stream to an HDFS file at the given path. This method will close the input stream.
 *///from w w w . j a  va 2s  .c  o  m
protected final void copyStreamToHdfs(InputStream resource, String hdfsDestFileName) throws IOException {
    FileSystem fs = getFileSystem();

    FSDataOutputStream os = fs.create(new Path(hdfsDestFileName), false);

    IOUtils.copyBytes(resource, os, fs.getConf(), true);
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * /*w w  w. j a v a 2s  .  c  o m*/
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * /*from   w w  w .j ava 2s.  c  om*/
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * //from www. j  a  v  a  2 s.  c  om
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ramsane.samplehadoop.ReadFile.java

public static void main(String[] args) {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://localhost:9000");
    FSDataInputStream is = null;/*from  www  .j av  a2 s  . c o m*/
    try {
        // get file system object....
        FileSystem fs = FileSystem.get(conf);
        is = fs.open(new Path("/big"));
        IOUtils.copyBytes(is, System.out, 4096, false);
    } catch (IOException ex) {
        System.out.println(ex.getMessage());
    } finally {
        IOUtils.closeStream(is);
    }
}

From source file:com.ramsane.samplehadoop.ReadTwice.java

public static void main(String[] args) {
    Configuration cfg = new Configuration();
    cfg.set("fs.defaultFS", "hdfs://localhost:9000");
    FSDataInputStream in = null;/* www .  ja  v a 2 s . c  o m*/
    try {
        FileSystem fs = FileSystem.get(cfg);
        in = fs.open(new Path("/big"));
        System.out.println("First TIme...");
        IOUtils.copyBytes(in, System.out, 4096, false);
        System.out.println("Second time..");
        in.seek(0);
        IOUtils.copyBytes(in, System.out, 4096, false);
    } catch (IOException ex) {
        System.out.println(ex.getMessage());
    } finally {
        IOUtils.closeStream(in);
    }
}

From source file:com.sec.webs.classification.hdfs.HdfsFileOperation.java

License:Open Source License

/**
 * Read file from HDFS/*from  w w w  .  ja  v  a  2 s  .  c o  m*/
 *
 * @param fileName
 */
public void ReadFile(String fileName) {
    try {
        FSDataInputStream dis = fs.open(new Path(fileName));
        IOUtils.copyBytes(dis, System.out, 4096, false);
        dis.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}