Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock.java

License:Open Source License

/**
 * /*  ww w.ja  v a  2  s  . com*/
 * @param fname
 * @param queue
 * @return
 * @throws DMLRuntimeException
 * @throws IOException
 */
private String writeTasksToFile(String fname, LocalTaskQueue<Task> queue, int maxDigits)
        throws DMLRuntimeException, IOException {
    BufferedWriter br = null;
    try {
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        Task t = null;
        boolean flagFirst = true; //workaround for keeping gen order
        while ((t = queue.dequeueTask()) != LocalTaskQueue.NO_MORE_TASKS) {
            br.write(createTaskFileLine(t, maxDigits, flagFirst));
            if (flagFirst)
                flagFirst = false;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Error writing tasks to taskfile " + fname, ex);
    } finally {
        if (br != null)
            br.close();
    }

    return fname;
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * /*from   ww  w. jav a  2  s. c o  m*/
 * @param fileName
 * @param src
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
protected void writeMatrixMarketMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen,
        long nnz) throws IOException {
    boolean sparse = src.isInSparseFormat();
    boolean entriesWritten = false;
    FileSystem fs = FileSystem.get(job);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

    int rows = src.getNumRows();
    int cols = src.getNumColumns();

    //bound check per block
    if (rows > rlen || cols > clen) {
        throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] "
                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
    }

    try {
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();

        // First output MM header
        sb.append("%%MatrixMarket matrix coordinate real general\n");

        // output number of rows, number of columns and number of nnz
        sb.append(rlen + " " + clen + " " + nnz + "\n");
        br.write(sb.toString());
        sb.setLength(0);

        // output matrix cell
        if (sparse) //SPARSE
        {
            SparseRowsIterator iter = src.getSparseRowsIterator();
            while (iter.hasNext()) {
                IJV cell = iter.next();

                sb.append(cell.i + 1);
                sb.append(' ');
                sb.append(cell.j + 1);
                sb.append(' ');
                sb.append(cell.v);
                sb.append('\n');
                br.write(sb.toString()); //same as append
                sb.setLength(0);
                entriesWritten = true;
            }
        } else //DENSE
        {
            for (int i = 0; i < rows; i++) {
                String rowIndex = Integer.toString(i + 1);
                for (int j = 0; j < cols; j++) {
                    double lvalue = src.getValueDenseUnsafe(i, j);
                    if (lvalue != 0) //for nnz
                    {
                        sb.append(rowIndex);
                        sb.append(' ');
                        sb.append(j + 1);
                        sb.append(' ');
                        sb.append(lvalue);
                        sb.append('\n');
                        br.write(sb.toString()); //same as append
                        sb.setLength(0);
                        entriesWritten = true;
                    }
                }
            }
        }

        //handle empty result
        if (!entriesWritten) {
            br.write("1 1 0\n");
        }
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * /*from  w ww .j  a va2 s.  c o  m*/
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCell.java

License:Open Source License

/**
 * // www .  j  a v a 2s  . c o m
 * @param path
 * @param job
 * @param src
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @throws IOException
 */
protected void writeTextCellMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen)
        throws IOException {
    boolean sparse = src.isInSparseFormat();
    boolean entriesWritten = false;
    FileSystem fs = FileSystem.get(job);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

    int rows = src.getNumRows();
    int cols = src.getNumColumns();

    //bound check per block
    if (rows > rlen || cols > clen) {
        throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] "
                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
    }

    try {
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();

        if (sparse) //SPARSE
        {
            SparseRowsIterator iter = src.getSparseRowsIterator();
            while (iter.hasNext()) {
                IJV cell = iter.next();

                sb.append(cell.i + 1);
                sb.append(' ');
                sb.append(cell.j + 1);
                sb.append(' ');
                sb.append(cell.v);
                sb.append('\n');
                br.write(sb.toString()); //same as append
                sb.setLength(0);
                entriesWritten = true;
            }
        } else //DENSE
        {
            for (int i = 0; i < rows; i++) {
                String rowIndex = Integer.toString(i + 1);
                for (int j = 0; j < cols; j++) {
                    double lvalue = src.getValueDenseUnsafe(i, j);
                    if (lvalue != 0) //for nnz
                    {
                        sb.append(rowIndex);
                        sb.append(' ');
                        sb.append(j + 1);
                        sb.append(' ');
                        sb.append(lvalue);
                        sb.append('\n');
                        br.write(sb.toString()); //same as append
                        sb.setLength(0);
                        entriesWritten = true;
                    }

                }
            }
        }

        //handle empty result
        if (!entriesWritten) {
            br.write("1 1 0\n");
        }
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * //from w w  w.  j  a v  a2 s .  com
 * @param fileName
 * @param src
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
protected void writeCSVMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, long nnz,
        CSVFileFormatProperties props) throws IOException {
    boolean sparse = src.isInSparseFormat();
    FileSystem fs = FileSystem.get(job);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

    try {
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();

        props = (props == null) ? new CSVFileFormatProperties() : props;
        String delim = props.getDelim();
        boolean csvsparse = props.isSparse();

        // Write header line, if needed
        if (props.hasHeader()) {
            //write row chunk-wise to prevent OOM on large number of columns
            for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
                for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                    sb.append("C" + (j + 1));
                    if (j < clen - 1)
                        sb.append(delim);
                }
                br.write(sb.toString());
                sb.setLength(0);
            }
            sb.append('\n');
            br.write(sb.toString());
            sb.setLength(0);
        }

        // Write data lines
        if (sparse) //SPARSE
        {
            SparseRow[] sparseRows = src.getSparseRows();
            for (int i = 0; i < rlen; i++) {
                //write row chunk-wise to prevent OOM on large number of columns
                int prev_jix = -1;
                if (sparseRows != null && i < sparseRows.length && sparseRows[i] != null
                        && !sparseRows[i].isEmpty()) {
                    SparseRow arow = sparseRows[i];
                    int alen = arow.size();
                    int[] aix = arow.getIndexContainer();
                    double[] avals = arow.getValueContainer();

                    for (int j = 0; j < alen; j++) {
                        int jix = aix[j];

                        // output empty fields, if needed
                        for (int j2 = prev_jix; j2 < jix - 1; j2++) {
                            if (!csvsparse)
                                sb.append('0');
                            sb.append(delim);

                            //flush buffered string
                            if (j2 % BLOCKSIZE_J == 0) {
                                br.write(sb.toString());
                                sb.setLength(0);
                            }
                        }

                        // output the value (non-zero)
                        sb.append(avals[j]);
                        if (jix < clen - 1)
                            sb.append(delim);
                        br.write(sb.toString());
                        sb.setLength(0);

                        //flush buffered string
                        if (jix % BLOCKSIZE_J == 0) {
                            br.write(sb.toString());
                            sb.setLength(0);
                        }

                        prev_jix = jix;
                    }
                }

                // Output empty fields at the end of the row.
                // In case of an empty row, output (clen-1) empty fields
                for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) {
                    for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                        if (!csvsparse)
                            sb.append('0');
                        if (j < clen - 1)
                            sb.append(delim);
                    }
                    br.write(sb.toString());
                    sb.setLength(0);
                }

                sb.append('\n');
                br.write(sb.toString());
                sb.setLength(0);
            }
        } else //DENSE
        {
            for (int i = 0; i < rlen; i++) {
                //write row chunk-wise to prevent OOM on large number of columns
                for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
                    for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                        double lvalue = src.getValueDenseUnsafe(i, j);
                        if (lvalue != 0) //for nnz
                            sb.append(lvalue);
                        else if (!csvsparse)
                            sb.append('0');

                        if (j != clen - 1)
                            sb.append(delim);
                    }
                    br.write(sb.toString());
                    sb.setLength(0);
                }

                sb.append('\n');
                br.write(sb.toString()); //same as append
                sb.setLength(0);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * /*w  w w.ja va2 s.c om*/
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * /*from ww  w. j  a  va 2 s.  c o m*/
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.CleanupMR.java

License:Open Source License

/**
 * //from   w w  w  .j  a va2 s  . co  m
 * @param path
 * @param numTasks
 * @throws DMLRuntimeException
 * @throws IOException
 */
private static void writeCleanupTasksToFile(Path path, int numTasks) throws DMLRuntimeException, IOException {
    BufferedWriter br = null;
    try {
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        for (int i = 1; i <= numTasks; i++)
            br.write(String.valueOf("CLEANUP TASK " + i) + "\n");
    } catch (Exception ex) {
        throw new DMLRuntimeException("Error writing cleanup tasks to taskfile " + path.toString(), ex);
    } finally {
        if (br != null)
            br.close();
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.CompactOutputFormat.java

License:Open Source License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    FSDataOutputStream fileOut = fs.create(file, progress);
    return new FixedLengthRecordWriter<K, V>(fileOut, job);
}

From source file:com.ibm.bi.dml.runtime.transform.BinAgent.java

License:Open Source License

private void writeTfMtd(int colID, String min, String max, String binwidth, String nbins, String tfMtdDir,
        FileSystem fs, TfUtils agents) throws IOException {
    Path pt = new Path(tfMtdDir + "/Bin/" + agents.getName(colID) + BIN_FILE_SUFFIX);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true)));
    br.write(colID + TXMTD_SEP + min + TXMTD_SEP + max + TXMTD_SEP + binwidth + TXMTD_SEP + nbins + "\n");
    br.close();//w ww .  j  a  v  a  2  s  . co  m
}