List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCellParallel.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); //check existence and non-empty file checkValidInputFile(fs, path);//from w ww . ja v a 2 s. co m //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false); //core read readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen, _isMMFile); //post-processing (representation-specific, change of sparse/dense block representation) if (ret.isInSparseFormat()) ret.sortSparseRows(); else ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = null;// www .ja v a2 s .c o m if (rlen > 0 && clen > 0) //otherwise CSV reblock based on file size for matrix w/ unknown dimensions ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); //check existence and non-empty file checkValidInputFile(fs, path); //core read ret = readCSVMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _props.hasHeader(), _props.getDelim(), _props.isFill(), _props.getFillValue()); //finally check if change of sparse/dense block representation required //(nnz explicitly maintained during read) ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSVParallel.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { // prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job);/* w w w.j a v a 2 s. c o m*/ InputSplit[] splits = informat.getSplits(job, _numThreads); if (splits[0] instanceof FileSplit) { // The splits do not always arrive in order by file name. // Sort the splits lexicographically by path so that the header will // be in the first split. // Note that we're assuming that the splits come in order by offset Arrays.sort(splits, new Comparator<InputSplit>() { @Override public int compare(InputSplit o1, InputSplit o2) { Path p1 = ((FileSplit) o1).getPath(); Path p2 = ((FileSplit) o2).getPath(); return p1.toString().compareTo(p2.toString()); } }); } // check existence and non-empty file checkValidInputFile(fs, path); // allocate output matrix block // First Read Pass (count rows/cols, determine offsets, allocate matrix block) MatrixBlock ret = computeCSVSizeAndCreateOutputMatrixBlock(splits, path, job, _props.hasHeader(), _props.getDelim(), estnnz); rlen = ret.getNumRows(); clen = ret.getNumColumns(); // Second Read Pass (read, parse strings, append to matrix block) readCSVMatrixFromHDFS(splits, path, job, ret, rlen, clen, brlen, bclen, _props.hasHeader(), _props.getDelim(), _props.isFill(), _props.getFillValue()); //post-processing (representation-specific, change of sparse/dense block representation) // - no sorting required for CSV because it is read in sorted order per row // - nnz explicitly maintained in parallel for the individual splits ret.examSparsity(); // sanity check for parallel row count (since determined internally) if (rlen > 0 && rlen != ret.getNumRows()) throw new DMLRuntimeException("Read matrix inconsistent with given meta data: " + "expected nrow=" + rlen + ", real nrow=" + ret.getNumRows()); return ret; }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
@Override public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); //if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); //core write/*from w w w . ja v a2s. com*/ if (src.isDiag()) writeDiagBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication); else writeBinaryBlockMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen, _replication); }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileSystem fs = FileSystem.get(job); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); MatrixIndexes index = new MatrixIndexes(1, 1); MatrixBlock block = new MatrixBlock((int) Math.min(rlen, brlen), (int) Math.min(clen, bclen), true); writer.append(index, block);//w w w. ja v a2 s . c o m writer.close(); }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java
License:Open Source License
@Override public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); //if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); //core write// w w w .j a v a 2 s.c o m writeBinaryCellMatrixToHDFS(path, job, src, rlen, clen, brlen, bclen); }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryCell.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileSystem fs = FileSystem.get(job); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); MatrixIndexes index = new MatrixIndexes(1, 1); MatrixCell cell = new MatrixCell(0); writer.append(index, cell);/* w w w . j a va 2 s .co m*/ writer.close(); }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
@Override public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { //validity check matrix dimensions if (src.getNumRows() != rlen || src.getNumColumns() != clen) { throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x" + src.getNumColumns() + " vs " + rlen + "x" + clen + "."); }//from w w w . j av a 2 s . co m //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); //if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); //core write writeMatrixMarketMatrixToHDFS(path, job, src, rlen, clen, nnz); }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCell.java
License:Open Source License
@Override public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { //validity check matrix dimensions if (src.getNumRows() != rlen || src.getNumColumns() != clen) { throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x" + src.getNumColumns() + " vs " + rlen + "x" + clen + "."); }//ww w. jav a 2s . c o m //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); //if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); //core write writeTextCellMatrixToHDFS(path, job, src, rlen, clen); }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
@Override public void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { //validity check matrix dimensions if (src.getNumRows() != rlen || src.getNumColumns() != clen) { throw new IOException("Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x" + src.getNumColumns() + " vs " + rlen + "x" + clen + "."); }/*from w ww . ja va 2s . c o m*/ //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); //if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); //core write writeCSVMatrixToHDFS(path, job, src, rlen, clen, nnz, _props); }