Example usage for org.apache.hadoop.io IOUtils copyBytes

List of usage examples for org.apache.hadoop.io IOUtils copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils copyBytes.

Prototype

public static void copyBytes(InputStream in, OutputStream out, long count, boolean close) throws IOException 

Source Link

Document

Copies count bytes from one stream to another.

Usage

From source file:org.apache.lens.server.stats.store.log.StatisticsLogPartitionHandler.java

License:Apache License

/**
 * Copy to hdfs.//w w w .  j a  v  a2s.c o m
 *
 * @param localPath the local path
 * @param finalPath the final path
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void copyToHdfs(String localPath, Path finalPath) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = finalPath.getFileSystem(conf);
    if (fs.exists(finalPath)) {
        fs.delete(finalPath, true);
    }
    IOUtils.copyBytes(new FileInputStream(localPath), fs.create(finalPath), conf, true);
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

License:Apache License

private static Path getFromCache(PigContext pigContext, Configuration conf, URL url) throws IOException {
    InputStream is1 = null;/*from  w ww  .j a v a  2s .  co m*/
    InputStream is2 = null;
    OutputStream os = null;

    try {
        Path stagingDir = getCacheStagingDir(conf);
        String filename = FilenameUtils.getName(url.getPath());

        is1 = url.openStream();
        String checksum = DigestUtils.shaHex(is1);
        FileSystem fs = FileSystem.get(conf);
        Path cacheDir = new Path(stagingDir, checksum);
        Path cacheFile = new Path(cacheDir, filename);
        if (fs.exists(cacheFile)) {
            log.debug("Found " + url + " in jar cache at " + cacheDir);
            long curTime = System.currentTimeMillis();
            fs.setTimes(cacheFile, -1, curTime);
            return cacheFile;
        }
        log.info("Url " + url + " was not found in jarcache at " + cacheDir);
        // attempt to copy to cache else return null
        fs.mkdirs(cacheDir, FileLocalizer.OWNER_ONLY_PERMS);
        is2 = url.openStream();
        os = FileSystem.create(fs, cacheFile, FileLocalizer.OWNER_ONLY_PERMS);
        IOUtils.copyBytes(is2, os, 4096, true);

        return cacheFile;

    } catch (IOException ioe) {
        log.info("Unable to retrieve jar from jar cache ", ioe);
        return null;
    } finally {
        org.apache.commons.io.IOUtils.closeQuietly(is1);
        org.apache.commons.io.IOUtils.closeQuietly(is2);
        // IOUtils should not close stream to HDFS quietly
        if (os != null) {
            os.close();
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

License:Apache License

/**
 * copy the file to hdfs in a temporary path
 * @param pigContext the pig context/* w w  w.  jav a2 s.c  o  m*/
 * @param conf the job conf
 * @param url the url to ship to hdfs
 * @return the location where it was shipped
 * @throws IOException
 */
private static Path shipToHDFS(PigContext pigContext, Configuration conf, URL url) throws IOException {

    boolean cacheEnabled = conf.getBoolean(PigConfiguration.PIG_USER_CACHE_ENABLED, false);
    if (cacheEnabled) {
        Path pathOnDfs = getFromCache(pigContext, conf, url);
        if (pathOnDfs != null) {
            return pathOnDfs;
        }
    }
    String suffix = FilenameUtils.getName(url.getPath());

    Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toUri().getPath(), suffix);
    FileSystem fs = dst.getFileSystem(conf);
    OutputStream os = null;
    InputStream is = null;
    try {
        is = url.openStream();
        os = fs.create(dst);
        IOUtils.copyBytes(is, os, 4096, true);
    } finally {
        org.apache.commons.io.IOUtils.closeQuietly(is);
        // IOUtils should not close stream to HDFS quietly
        if (os != null) {
            os.close();
        }
    }
    return dst;
}

From source file:org.apache.reef.runtime.yarn.driver.restart.DFSEvaluatorLogOverwriteReaderWriter.java

License:Apache License

/**
 * Writes a formatted entry (addition or removal) for an Evaluator ID into the DFS evaluator log.
 * The log is appended to by reading first, adding on the information, and then overwriting the entire log.
 * Since the {@link FileSystem} does not support appends, this {@link DFSEvaluatorLogReaderWriter}
 * uses a two-file approach, where when we write, we always overwrite the older file.
 * @param formattedEntry The formatted entry (entry with evaluator ID and addition/removal information).
 * @throws IOException when file cannot be written.
 *///from  w ww  .j  av a  2s .  c o m
@Override
public synchronized void writeToEvaluatorLog(final String formattedEntry) throws IOException {
    final Path writePath = getWritePath();

    // readPath is always not the writePath.
    final Path readPath = getAlternativePath(writePath);

    try (final FSDataOutputStream outputStream = this.fileSystem.create(writePath, true)) {
        InputStream inputStream = null;
        try {
            final InputStream newEntryInputStream = new ByteArrayInputStream(
                    formattedEntry.getBytes(StandardCharsets.UTF_8));

            if (fileSystem.exists(readPath)) {
                inputStream = new SequenceInputStream(this.fileSystem.open(readPath), newEntryInputStream);
            } else {
                inputStream = newEntryInputStream;
            }

            IOUtils.copyBytes(inputStream, outputStream, 4096, false);
        } finally {
            outputStream.hsync();
            if (inputStream != null) {
                inputStream.close();
            }
        }
    }
}

From source file:org.apache.reef.runtime.yarn.driver.restart.DFSEvaluatorLogOverwriteWriter.java

License:Apache License

/**
 * For certain HDFS implementation, the append operation may not be supported (e.g., Azure blob - wasb)
 * in this case, we will emulate the append operation by reading the content, appending entry at the end,
 * then recreating the file with appended content.
 *
 * @throws java.io.IOException when the file can't be written.
 *///www  . j av  a  2 s.  c om
private void appendByDeleteAndCreate(final String appendEntry) throws IOException {
    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    try (final InputStream inputStream = this.fileSystem.open(this.changelogPath)) {
        IOUtils.copyBytes(inputStream, outputStream, 4096, true);
    }

    final String newContent = outputStream.toString("UTF-8") + appendEntry;
    this.fileSystem.delete(this.changelogPath, true);

    try (final FSDataOutputStream newOutput = this.fileSystem.create(this.changelogPath);
            final InputStream newInput = new ByteArrayInputStream(
                    newContent.getBytes(StandardCharsets.UTF_8))) {
        IOUtils.copyBytes(newInput, newOutput, 4096, true);
    }
}

From source file:org.apache.reef.runtime.yarn.driver.YarnContainerManager.java

License:Apache License

/**
 * For certain HDFS implementation, the append operation may not be supported (e.g., Azure blob - wasb)
 * in this case, we will emulate the append operation by reading the content, appending entry at the end,
 * then recreating the file with appended content.
 *
 * @throws java.io.IOException when the file can't be written.
 *///from   w  ww  .  java  2s . c  o m

private void appendByDeleteAndCreate(final FileSystem fs, final Path path, final String appendEntry)
        throws IOException {
    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    try (final InputStream inputStream = fs.open(path)) {
        IOUtils.copyBytes(inputStream, outputStream, 4096, true);
    }

    final String newContent = outputStream.toString() + appendEntry;
    fs.delete(path, true);

    try (final FSDataOutputStream newOutput = fs.create(path);
            final InputStream newInput = new ByteArrayInputStream(newContent.getBytes())) {
        IOUtils.copyBytes(newInput, newOutput, 4096, true);
    }

}

From source file:org.apache.sysml.runtime.io.WriterMatrixMarket.java

License:Apache License

public static void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen,
        long nnz) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem fs = IOUtilFunctions.getFileSystem(src, conf);

    if (fs.exists(merge)) {
        fs.delete(merge, true);//w w w . j av  a  2s.c  om
    }

    OutputStream out = fs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (fs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = fs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = fs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (fs.isFile(src)) {
        InputStream in = null;
        try {
            in = fs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:org.apache.sysml.runtime.io.WriterTextCSV.java

License:Apache License

@SuppressWarnings("unchecked")
public final void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem fs = IOUtilFunctions.getFileSystem(srcFilePath, conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*//  w ww .  j  a v  a2s.co  m
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        fs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        fs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        fs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        fs.rename(srcFilePath, destFilePath);

        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (fs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<>();
        for (FileStatus stat : fs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = fs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = fs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        fs.delete(firstpart, true);
        fs.rename(tmp, firstpart);

        // rename srcfile to destFile
        fs.delete(destFilePath, true);
        fs.createNewFile(destFilePath); // force the creation of directory structure
        fs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        fs.rename(srcFilePath, destFilePath); // move the data 

    } else if (fs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = fs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = fs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:org.berlin.mine.test.TestCat.java

License:Open Source License

public static void main(final String[] args) throws Exception {

    String localSrc = "README.txt";
    String dst = "hdfs://localhost/user/b/r.txt";
    InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(dst), conf);
    OutputStream out = fs.create(new Path(dst), new Progressable() {
        public void progress() {
            System.out.print(".");
        }/*from  w  w w  .  j  av  a 2  s .  com*/
    });
    System.out.println();
    IOUtils.copyBytes(in, out, 4096, true);
}

From source file:org.bgi.flexlab.gaea.util.SortUilts.java

License:Open Source License

public static void merge(MultipleVCFHeader mVcfHeader, VCFSortOptions options, Configuration conf) {
    try {//ww w .  ja va  2 s  .  c  o  m
        System.out.println("vcf-MultiSampleSort :: Merging output...");

        // First, place the VCF or BCF header.
        final Path outpath = new Path(options.getOutputPath());
        final Path wrkPath = new Path(options.getWorkPath());
        final FileSystem srcFS = wrkPath.getFileSystem(conf);
        final FileSystem dstFS = outpath.getFileSystem(conf);

        Map<String, OutputStream> outs = new HashMap<String, OutputStream>();
        Map<Integer, String> multiOutputs = options.getMultiOutputs();
        for (String result : multiOutputs.values()) {
            Path sPath = new Path(options.getOutputPath() + "/" + result + ".vcf");
            OutputStream os = dstFS.create(sPath);
            outs.put(result, os);
        }

        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder();
        VariantContextWriter writer;
        Map<Integer, SingleVCFHeader> id2VcfHeader = mVcfHeader.getID2SingleVcfHeader();
        for (int id : multiOutputs.keySet()) {
            VCFHeader newHeader = id2VcfHeader.get(id).getHeader();
            writer = builder.setOutputStream(new FilterOutputStream(outs.get(multiOutputs.get(id))) {
                @Override
                public void close() throws IOException {
                    this.out.flush();
                }
            }).setOptions(VariantContextWriterBuilder.NO_OPTIONS).build();

            writer.writeHeader(newHeader);
            writer.close();

            final FileStatus[] parts = srcFS.globStatus(
                    new Path(options.getWorkPath(), multiOutputs.get(id) + "-*-[0-9][0-9][0-9][0-9][0-9]*"));

            int i = 0;

            for (final FileStatus part : parts) {
                System.out.printf("sort:: Merging part %d ( size %d)...\n", i++, part.getLen());
                System.out.flush();

                final FSDataInputStream ins = srcFS.open(part.getPath());
                IOUtils.copyBytes(ins, outs.get(multiOutputs.get(id)), conf, false);
                ins.close();
            }
            for (final FileStatus part : parts)
                srcFS.delete(part.getPath(), false);

            outs.get(multiOutputs.get(id)).close();

        }
    } catch (IOException e) {
        System.err.printf("vcf-MultiSampleSort :: Output merging failed: %s\n", e);
    }
}