List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * /*from ww w .ja va 2 s .c om*/ * @param fnameStaging * @param fnameStagingCompare * @param fnameNew * @param metadata * @param withCompare * @throws IOException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); MatrixCharacteristics mc = metadata.getMatrixCharacteristics(); long rlen = mc.getRows(); long clen = mc.getCols(); int brlen = mc.getRowsPerBlock(); int bclen = mc.getColsPerBlock(); MatrixIndexes indexes = new MatrixIndexes(1, 1); MatrixCell cell = new MatrixCell(0); SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); //beware ca 50ms try { boolean written = false; for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) { File dir = new File(fnameStaging + "/" + brow + "_" + bcol); File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol); MatrixBlock mb = null; long row_offset = (brow - 1) * brlen + 1; long col_offset = (bcol - 1) * bclen + 1; if (dir.exists()) { if (withCompare && dir2.exists()) //WITH COMPARE BLOCK { //copy only values that are different from the original String[] lnames2 = dir2.list(); if (lnames2.length != 1) //there should be exactly 1 compare block throw new DMLRuntimeException( "Unable to merge results because multiple compare blocks found."); mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen); boolean appendOnly = mb.isInSparseFormat(); double[][] compare = DataConverter.convertToDoubleMatrix(mb); String[] lnames = dir.list(); for (String lname : lnames) { MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); mergeWithComp(mb, tmp, compare); } //sort sparse due to append-only if (appendOnly) mb.sortSparseRows(); //change sparsity if required after mb.examSparsity(); } else //WITHOUT COMPARE BLOCK { //copy all non-zeros from all workers String[] lnames = dir.list(); boolean appendOnly = false; for (String lname : lnames) { if (mb == null) { mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); appendOnly = mb.isInSparseFormat(); } else { MatrixBlock tmp = StagingFileUtils .readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); mergeWithoutComp(mb, tmp, appendOnly); } } //sort sparse due to append-only if (appendOnly) mb.sortSparseRows(); //change sparsity if required after mb.examSparsity(); } } //write the block to binary cell if (mb != null) { if (mb.isInSparseFormat()) { SparseRowsIterator iter = mb.getSparseRowsIterator(); while (iter.hasNext()) { IJV lcell = iter.next(); indexes.setIndexes(row_offset + lcell.i, col_offset + lcell.j); cell.setValue(lcell.v); out.append(indexes, cell); written = true; } } else { for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) { double lvalue = mb.getValueDenseUnsafe(i, j); if (lvalue != 0) //for nnz { indexes.setIndexes(row_offset + i, col_offset + j); cell.setValue(lvalue); out.append(indexes, cell); written = true; } } } } } if (!written) out.append(indexes, cell); } finally { if (out != null) out.close(); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * /* www .ja v a2s . co m*/ * @param fnameNew * @param inMO * @throws CacheException * @throws IOException */ private void copyAllFiles(String fnameNew, ArrayList<MatrixObject> inMO) throws CacheException, IOException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); //create output dir fs.mkdirs(path); //merge in all input matrix objects IDSequence seq = new IDSequence(); for (MatrixObject in : inMO) { LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName() + ") via file rename."); //copy over files (just rename file or entire dir) Path tmpPath = new Path(in.getFileName()); String lname = tmpPath.getName(); fs.rename(tmpPath, new Path(fnameNew + "/" + lname + seq.getNextID())); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeRemoteMR.java
License:Open Source License
/** * /*from w w w.j av a 2 s. co m*/ * @param fname null if no comparison required * @param fnameNew * @param srcFnames * @param ii * @param oi * @param rlen * @param clen * @param brlen * @param bclen * @throws DMLRuntimeException */ @SuppressWarnings({ "unused", "deprecation" }) protected void executeMerge(String fname, String fnameNew, String[] srcFnames, InputInfo ii, OutputInfo oi, long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException { String jobname = "ParFor-RMMR"; long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0; JobConf job; job = new JobConf(ResultMergeRemoteMR.class); job.setJobName(jobname + _pfid); //maintain dml script counters Statistics.incrementNoOfCompiledMRJobs(); //warning for textcell/binarycell without compare boolean withCompare = (fname != null); if ((oi == OutputInfo.TextCellOutputInfo || oi == OutputInfo.BinaryCellOutputInfo) && !withCompare && ResultMergeLocalFile.ALLOW_COPY_CELLFILES) LOG.warn("Result merge for " + OutputInfo.outputInfoToString(oi) + " without compare can be realized more efficiently with LOCAL_FILE than REMOTE_MR."); try { Path pathCompare = null; Path pathNew = new Path(fnameNew); ///// //configure the MR job if (withCompare) { pathCompare = new Path(fname).makeQualified(FileSystem.get(job)); MRJobConfiguration.setResultMergeInfo(job, pathCompare.toString(), ii, LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, brlen, bclen); } else MRJobConfiguration.setResultMergeInfo(job, "null", ii, LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, bclen, bclen); //set mappers, reducers, combiners job.setMapperClass(ResultMergeRemoteMapper.class); job.setReducerClass(ResultMergeRemoteReducer.class); if (oi == OutputInfo.TextCellOutputInfo) { job.setMapOutputKeyClass(MatrixIndexes.class); job.setMapOutputValueClass(TaggedMatrixCell.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); } else if (oi == OutputInfo.BinaryCellOutputInfo) { job.setMapOutputKeyClass(MatrixIndexes.class); job.setMapOutputValueClass(TaggedMatrixCell.class); job.setOutputKeyClass(MatrixIndexes.class); job.setOutputValueClass(MatrixCell.class); } else if (oi == OutputInfo.BinaryBlockOutputInfo) { //setup partitioning, grouping, sorting for composite key (old API) job.setPartitionerClass(ResultMergeRemotePartitioning.class); //partitioning job.setOutputValueGroupingComparator(ResultMergeRemoteGrouping.class); //grouping job.setOutputKeyComparatorClass(ResultMergeRemoteSorting.class); //sorting job.setMapOutputKeyClass(ResultMergeTaggedMatrixIndexes.class); job.setMapOutputValueClass(TaggedMatrixBlock.class); job.setOutputKeyClass(MatrixIndexes.class); job.setOutputValueClass(MatrixBlock.class); } //set input format job.setInputFormat(ii.inputFormatClass); //set the input path Path[] paths = null; if (withCompare) { paths = new Path[srcFnames.length + 1]; paths[0] = pathCompare; for (int i = 1; i < paths.length; i++) paths[i] = new Path(srcFnames[i - 1]); } else { paths = new Path[srcFnames.length]; for (int i = 0; i < paths.length; i++) paths[i] = new Path(srcFnames[i]); } FileInputFormat.setInputPaths(job, paths); //set output format job.setOutputFormat(oi.outputFormatClass); //set output path MapReduceTool.deleteFileIfExistOnHDFS(fnameNew); FileOutputFormat.setOutputPath(job, pathNew); ////// //set optimization parameters //set the number of mappers and reducers //job.setNumMapTasks( _numMappers ); //use default num mappers long reducerGroups = _numReducers; if (oi == OutputInfo.BinaryBlockOutputInfo) reducerGroups = Math.max(rlen / brlen, 1) * Math.max(clen / bclen, 1); else //textcell/binarycell reducerGroups = Math.max((rlen * clen) / StagingFileUtils.CELL_BUFFER_SIZE, 1); job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups)); //use FLEX scheduler configuration properties if (ParForProgramBlock.USE_FLEX_SCHEDULER_CONF) { job.setInt("flex.map.min", 0); job.setInt("flex.map.max", _numMappers); job.setInt("flex.reduce.min", 0); job.setInt("flex.reduce.max", _numMappers); } //disable automatic tasks timeouts and speculative task exec job.setInt("mapred.task.timeout", 0); job.setMapSpeculativeExecution(false); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); //enables the reuse of JVMs (multiple tasks per MR task) if (_jvmReuse) job.setNumTasksToExecutePerJvm(-1); //unlimited //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower) //job.set("mapred.compress.map.output", "true"); //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //set the replication factor for the results job.setInt("dfs.replication", _replication); //set the max number of retries per map task // disabled job-level configuration to respect cluster configuration // note: this refers to hadoop2, hence it never had effect on mr1 //job.setInt("mapreduce.map.maxattempts", _max_retry); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); ///// // execute the MR job JobClient.runJob(job); //maintain dml script counters Statistics.incrementNoOfExecutedMRJobs(); } catch (Exception ex) { throw new DMLRuntimeException(ex); } if (DMLScript.STATISTICS) { long t1 = System.nanoTime(); Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0); } }
From source file:com.ibm.bi.dml.runtime.instructions.cp.DataPartitionCPInstruction.java
License:Open Source License
@Override public void processInstruction(ExecutionContext ec) throws DMLUnsupportedOperationException, DMLRuntimeException { //get input/*from ww w.j a va 2 s . c om*/ MatrixObject moIn = (MatrixObject) ec.getVariable(input1.getName()); MatrixBlock mb = moIn.acquireRead(); //execute operations MatrixObject moOut = (MatrixObject) ec.getVariable(output.getName()); String fname = moOut.getFileName(); moOut.setPartitioned(_pformat, -1); //modify meta data output try { //write matrix partitions to hdfs WriterBinaryBlock writer = (WriterBinaryBlock) MatrixWriterFactory .createMatrixWriter(OutputInfo.BinaryBlockOutputInfo); writer.writePartitionedBinaryBlockMatrixToHDFS(new Path(fname), new JobConf(ConfigurationManager.getCachedJobConf()), mb, moIn.getNumRows(), moIn.getNumColumns(), (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(), _pformat); //ensure correctness of output characteristics (required if input unknown during compile and no recompile) MatrixCharacteristics mc = new MatrixCharacteristics(moIn.getNumRows(), moIn.getNumColumns(), (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(), moIn.getNnz()); MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); moOut.setMetaData(meta); } catch (Exception ex) { throw new DMLRuntimeException("Failed to execute data partitioning instruction.", ex); } //release input ec.releaseMatrixInput(input1.getName()); }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);//ww w . j av a 2 s . c o m //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required if (!AGGREGATE_BLOCK_NNZ) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java
License:Open Source License
/** * //w w w. j a v a2 s . co m * @param fname * @param rlen * @param clen * @param brlen * @param bclen * @param estnnz * @return * @throws IOException * @throws DMLRuntimeException */ public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { //allocate output matrix block collection ArrayList<IndexedMatrixValue> ret = new ArrayList<IndexedMatrixValue>(); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path); //core read readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParallel.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block (incl block allocation for parallel) MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, true); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);/* ww w. j a v a 2 s. c om*/ //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required if (!AGGREGATE_BLOCK_NNZ) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParFiles.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);//from w w w . ja va 2 s .c o m //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryCell.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); //check existence and non-empty file checkValidInputFile(fs, path);/*from w ww. j a v a 2s. c o m*/ //core read readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required //(nnz maintained via append during read for both dense/sparse) ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); //check existence and non-empty file checkValidInputFile(fs, path);/*from w ww . ja v a 2s . c o m*/ //core read if (fs.isDirectory(path)) readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen); else readRawTextCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _isMMFile); //finally check if change of sparse/dense block representation required if (!ret.isInSparseFormat()) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }