Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCellParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//from   w ww . ja  v  a  2 s. co  m

    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //core read 
    readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen, _isMMFile);

    //post-processing (representation-specific, change of sparse/dense block representation)
    if (ret.isInSparseFormat())
        ret.sortSparseRows();
    else
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = null;// www  .ja v a2 s  .c o m
    if (rlen > 0 && clen > 0) //otherwise CSV reblock based on file size for matrix w/ unknown dimensions
        ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);

    //core read 
    ret = readCSVMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _props.hasHeader(),
            _props.getDelim(), _props.isFill(), _props.getFillValue());

    //finally check if change of sparse/dense block representation required
    //(nnz explicitly maintained during read)
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSVParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);/* w  w  w.j  a v  a 2  s.  c  o  m*/

    InputSplit[] splits = informat.getSplits(job, _numThreads);

    if (splits[0] instanceof FileSplit) {
        // The splits do not always arrive in order by file name.
        // Sort the splits lexicographically by path so that the header will
        // be in the first split.
        // Note that we're assuming that the splits come in order by offset
        Arrays.sort(splits, new Comparator<InputSplit>() {
            @Override
            public int compare(InputSplit o1, InputSplit o2) {
                Path p1 = ((FileSplit) o1).getPath();
                Path p2 = ((FileSplit) o2).getPath();
                return p1.toString().compareTo(p2.toString());
            }
        });
    }

    // check existence and non-empty file
    checkValidInputFile(fs, path);

    // allocate output matrix block
    // First Read Pass (count rows/cols, determine offsets, allocate matrix block)
    MatrixBlock ret = computeCSVSizeAndCreateOutputMatrixBlock(splits, path, job, _props.hasHeader(),
            _props.getDelim(), estnnz);
    rlen = ret.getNumRows();
    clen = ret.getNumColumns();

    // Second Read Pass (read, parse strings, append to matrix block)
    readCSVMatrixFromHDFS(splits, path, job, ret, rlen, clen, brlen, bclen, _props.hasHeader(),
            _props.getDelim(), _props.isFill(), _props.getFillValue());

    //post-processing (representation-specific, change of sparse/dense block representation)
    // - no sorting required for CSV because it is read in sorted order per row
    // - nnz explicitly maintained in parallel for the individual splits
    ret.examSparsity();

    // sanity check for parallel row count (since determined internally)
    if (rlen > 0 && rlen != ret.getNumRows())
        throw new DMLRuntimeException("Read matrix inconsistent with given meta data: " + "expected nrow="
                + rlen + ", real nrow=" + ret.getNumRows());

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write/*from w  w  w . ja v a2s.  com*/
    if (src.isDiag())
        writeDiagBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication);
    else
        writeBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication);
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen)
        throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = FileSystem.get(job);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);

    MatrixIndexes index = new MatrixIndexes(1, 1);
    MatrixBlock block = new MatrixBlock((int) Math.min(rlen, brlen), (int) Math.min(clen, bclen), true);
    writer.append(index, block);//w  w  w.  ja  v a2 s  . c  o m
    writer.close();
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write//  w  w w  .j a v a  2  s.c  o m
    writeBinaryCellMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen);
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen)
        throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = FileSystem.get(job);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);

    MatrixIndexes index = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);
    writer.append(index, cell);/* w w w . j a va 2 s  .co  m*/
    writer.close();
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }//from w w w  . j  av  a  2  s . co  m

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeMatrixMarketMatrixToHDFS(path, job, src, rlen, clen, nnz);
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCell.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }//ww w. jav a 2s . c o m

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeTextCellMatrixToHDFS(path, job, src, rlen, clen);
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }/*from   w ww . ja va 2s  .  c o m*/

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeCSVMatrixToHDFS(path, job, src, rlen, clen, nnz, _props);
}