Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCellParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//from   w ww . ja  v  a  2 s. co  m

    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //core read 
    readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen, _isMMFile);

    //post-processing (representation-specific, change of sparse/dense block representation)
    if (ret.isInSparseFormat())
        ret.sortSparseRows();
    else
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = null;// www  .ja v a2 s  .c o m
    if (rlen > 0 && clen > 0) //otherwise CSV reblock based on file size for matrix w/ unknown dimensions
        ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);

    //core read 
    ret = readCSVMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _props.hasHeader(),
            _props.getDelim(), _props.isFill(), _props.getFillValue());

    //finally check if change of sparse/dense block representation required
    //(nnz explicitly maintained during read)
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSVParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);/* w  w  w.j  a v  a 2  s.  c  o  m*/

    InputSplit[] splits = informat.getSplits(job, _numThreads);

    if (splits[0] instanceof FileSplit) {
        // The splits do not always arrive in order by file name.
        // Sort the splits lexicographically by path so that the header will
        // be in the first split.
        // Note that we're assuming that the splits come in order by offset
        Arrays.sort(splits, new Comparator<InputSplit>() {
            @Override
            public int compare(InputSplit o1, InputSplit o2) {
                Path p1 = ((FileSplit) o1).getPath();
                Path p2 = ((FileSplit) o2).getPath();
                return p1.toString().compareTo(p2.toString());
            }
        });
    }

    // check existence and non-empty file
    checkValidInputFile(fs, path);

    // allocate output matrix block
    // First Read Pass (count rows/cols, determine offsets, allocate matrix block)
    MatrixBlock ret = computeCSVSizeAndCreateOutputMatrixBlock(splits, path, job, _props.hasHeader(),
            _props.getDelim(), estnnz);
    rlen = ret.getNumRows();
    clen = ret.getNumColumns();

    // Second Read Pass (read, parse strings, append to matrix block)
    readCSVMatrixFromHDFS(splits, path, job, ret, rlen, clen, brlen, bclen, _props.hasHeader(),
            _props.getDelim(), _props.isFill(), _props.getFillValue());

    //post-processing (representation-specific, change of sparse/dense block representation)
    // - no sorting required for CSV because it is read in sorted order per row
    // - nnz explicitly maintained in parallel for the individual splits
    ret.examSparsity();

    // sanity check for parallel row count (since determined internally)
    if (rlen > 0 && rlen != ret.getNumRows())
        throw new DMLRuntimeException("Read matrix inconsistent with given meta data: " + "expected nrow="
                + rlen + ", real nrow=" + ret.getNumRows());

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write/*from w  w  w . ja v a2s.  com*/
    if (src.isDiag())
        writeDiagBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication);
    else
        writeBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication);
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen)
        throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = FileSystem.get(job);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);

    MatrixIndexes index = new MatrixIndexes(1, 1);
    MatrixBlock block = new MatrixBlock((int) Math.min(rlen, brlen), (int) Math.min(clen, bclen), true);
    writer.append(index, block);//w  w  w.  ja  v a2 s  . c  o m
    writer.close();
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write//  w  w w  .j a v a  2  s.c  o m
    writeBinaryCellMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen);
}

From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java

License:Open Source License

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen)
        throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = FileSystem.get(job);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);

    MatrixIndexes index = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);
    writer.append(index, cell);/* w w w . j a va 2 s  .co  m*/
    writer.close();
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }//from w w w  . j  av  a  2  s . co  m

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeMatrixMarketMatrixToHDFS(path, job, src, rlen, clen, nnz);
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCell.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }//ww w. jav a 2s . c o m

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeTextCellMatrixToHDFS(path, job, src, rlen, clen);
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

@Override
public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen,
        long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException {
    //validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
        throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x"
                + src.getNumColumns() + " vs " + rlen + "x" + clen + ".");
    }/*from   w ww . ja va 2s  .  c o m*/

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);

    //if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    //core write
    writeCSVMatrixToHDFS(path, job, src, rlen, clen, nnz, _props);
}