Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*from ww  w .ja va 2  s  .c  om*/
 * @param fnameStaging
 * @param fnameStagingCompare
 * @param fnameNew
 * @param metadata
 * @param withCompare
 * @throws IOException
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew,
        MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fnameNew);

    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();

    MatrixIndexes indexes = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);

    SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); //beware ca 50ms
    try {
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++)
            for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
                File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
                File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
                MatrixBlock mb = null;

                long row_offset = (brow - 1) * brlen + 1;
                long col_offset = (bcol - 1) * bclen + 1;

                if (dir.exists()) {
                    if (withCompare && dir2.exists()) //WITH COMPARE BLOCK
                    {
                        //copy only values that are different from the original
                        String[] lnames2 = dir2.list();
                        if (lnames2.length != 1) //there should be exactly 1 compare block
                            throw new DMLRuntimeException(
                                    "Unable to merge results because multiple compare blocks found.");
                        mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen,
                                bclen);
                        boolean appendOnly = mb.isInSparseFormat();
                        double[][] compare = DataConverter.convertToDoubleMatrix(mb);

                        String[] lnames = dir.list();
                        for (String lname : lnames) {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname,
                                    brlen, bclen);
                            mergeWithComp(mb, tmp, compare);
                        }

                        //sort sparse due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    } else //WITHOUT COMPARE BLOCK
                    {
                        //copy all non-zeros from all workers
                        String[] lnames = dir.list();
                        boolean appendOnly = false;
                        for (String lname : lnames) {
                            if (mb == null) {
                                mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen,
                                        bclen);
                                appendOnly = mb.isInSparseFormat();
                            } else {
                                MatrixBlock tmp = StagingFileUtils
                                        .readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                                mergeWithoutComp(mb, tmp, appendOnly);
                            }
                        }

                        //sort sparse due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    }
                }

                //write the block to binary cell
                if (mb != null) {
                    if (mb.isInSparseFormat()) {
                        SparseRowsIterator iter = mb.getSparseRowsIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            indexes.setIndexes(row_offset + lcell.i, col_offset + lcell.j);
                            cell.setValue(lcell.v);
                            out.append(indexes, cell);
                            written = true;
                        }
                    } else {
                        for (int i = 0; i < brlen; i++)
                            for (int j = 0; j < bclen; j++) {
                                double lvalue = mb.getValueDenseUnsafe(i, j);
                                if (lvalue != 0) //for nnz
                                {
                                    indexes.setIndexes(row_offset + i, col_offset + j);
                                    cell.setValue(lvalue);
                                    out.append(indexes, cell);
                                    written = true;
                                }
                            }
                    }
                }
            }

        if (!written)
            out.append(indexes, cell);
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*  www .ja v a2s  . co m*/
 * @param fnameNew
 * @param inMO
 * @throws CacheException
 * @throws IOException
 */
private void copyAllFiles(String fnameNew, ArrayList<MatrixObject> inMO) throws CacheException, IOException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fnameNew);

    //create output dir
    fs.mkdirs(path);

    //merge in all input matrix objects
    IDSequence seq = new IDSequence();
    for (MatrixObject in : inMO) {
        LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName()
                + ") via file rename.");

        //copy over files (just rename file or entire dir)
        Path tmpPath = new Path(in.getFileName());
        String lname = tmpPath.getName();
        fs.rename(tmpPath, new Path(fnameNew + "/" + lname + seq.getNextID()));
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeRemoteMR.java

License:Open Source License

/**
 * /*from w  w  w.j av a 2  s. co m*/
 * @param fname    null if no comparison required
 * @param fnameNew
 * @param srcFnames
 * @param ii
 * @param oi
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings({ "unused", "deprecation" })
protected void executeMerge(String fname, String fnameNew, String[] srcFnames, InputInfo ii, OutputInfo oi,
        long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException {
    String jobname = "ParFor-RMMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

    JobConf job;
    job = new JobConf(ResultMergeRemoteMR.class);
    job.setJobName(jobname + _pfid);

    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();

    //warning for textcell/binarycell without compare
    boolean withCompare = (fname != null);
    if ((oi == OutputInfo.TextCellOutputInfo || oi == OutputInfo.BinaryCellOutputInfo) && !withCompare
            && ResultMergeLocalFile.ALLOW_COPY_CELLFILES)
        LOG.warn("Result merge for " + OutputInfo.outputInfoToString(oi)
                + " without compare can be realized more efficiently with LOCAL_FILE than REMOTE_MR.");

    try {
        Path pathCompare = null;
        Path pathNew = new Path(fnameNew);

        /////
        //configure the MR job
        if (withCompare) {
            pathCompare = new Path(fname).makeQualified(FileSystem.get(job));
            MRJobConfiguration.setResultMergeInfo(job, pathCompare.toString(), ii,
                    LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, brlen,
                    bclen);
        } else
            MRJobConfiguration.setResultMergeInfo(job, "null", ii,
                    LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, bclen,
                    bclen);

        //set mappers, reducers, combiners
        job.setMapperClass(ResultMergeRemoteMapper.class);
        job.setReducerClass(ResultMergeRemoteReducer.class);

        if (oi == OutputInfo.TextCellOutputInfo) {
            job.setMapOutputKeyClass(MatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixCell.class);
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(Text.class);
        } else if (oi == OutputInfo.BinaryCellOutputInfo) {
            job.setMapOutputKeyClass(MatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixCell.class);
            job.setOutputKeyClass(MatrixIndexes.class);
            job.setOutputValueClass(MatrixCell.class);
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            //setup partitioning, grouping, sorting for composite key (old API)
            job.setPartitionerClass(ResultMergeRemotePartitioning.class); //partitioning
            job.setOutputValueGroupingComparator(ResultMergeRemoteGrouping.class); //grouping
            job.setOutputKeyComparatorClass(ResultMergeRemoteSorting.class); //sorting

            job.setMapOutputKeyClass(ResultMergeTaggedMatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixBlock.class);
            job.setOutputKeyClass(MatrixIndexes.class);
            job.setOutputValueClass(MatrixBlock.class);
        }

        //set input format 
        job.setInputFormat(ii.inputFormatClass);

        //set the input path 
        Path[] paths = null;
        if (withCompare) {
            paths = new Path[srcFnames.length + 1];
            paths[0] = pathCompare;
            for (int i = 1; i < paths.length; i++)
                paths[i] = new Path(srcFnames[i - 1]);
        } else {
            paths = new Path[srcFnames.length];
            for (int i = 0; i < paths.length; i++)
                paths[i] = new Path(srcFnames[i]);
        }
        FileInputFormat.setInputPaths(job, paths);

        //set output format
        job.setOutputFormat(oi.outputFormatClass);

        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        FileOutputFormat.setOutputPath(job, pathNew);

        //////
        //set optimization parameters

        //set the number of mappers and reducers 
        //job.setNumMapTasks( _numMappers ); //use default num mappers
        long reducerGroups = _numReducers;
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            reducerGroups = Math.max(rlen / brlen, 1) * Math.max(clen / bclen, 1);
        else //textcell/binarycell
            reducerGroups = Math.max((rlen * clen) / StagingFileUtils.CELL_BUFFER_SIZE, 1);
        job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups));

        //use FLEX scheduler configuration properties
        if (ParForProgramBlock.USE_FLEX_SCHEDULER_CONF) {
            job.setInt("flex.map.min", 0);
            job.setInt("flex.map.max", _numMappers);
            job.setInt("flex.reduce.min", 0);
            job.setInt("flex.reduce.max", _numMappers);
        }

        //disable automatic tasks timeouts and speculative task exec
        job.setInt("mapred.task.timeout", 0);
        job.setMapSpeculativeExecution(false);

        //set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);

        //enables the reuse of JVMs (multiple tasks per MR task)
        if (_jvmReuse)
            job.setNumTasksToExecutePerJvm(-1); //unlimited

        //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower)
        //job.set("mapred.compress.map.output", "true");
        //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");

        //set the replication factor for the results
        job.setInt("dfs.replication", _replication);

        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt("mapreduce.map.maxattempts", _max_retry);

        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);

        /////
        // execute the MR job   

        JobClient.runJob(job);

        //maintain dml script counters
        Statistics.incrementNoOfExecutedMRJobs();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }

    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
}

From source file:com.ibm.bi.dml.runtime.instructions.cp.DataPartitionCPInstruction.java

License:Open Source License

@Override
public void processInstruction(ExecutionContext ec)
        throws DMLUnsupportedOperationException, DMLRuntimeException {
    //get input/*from   ww  w.j a  va 2 s .  c  om*/
    MatrixObject moIn = (MatrixObject) ec.getVariable(input1.getName());
    MatrixBlock mb = moIn.acquireRead();

    //execute operations 
    MatrixObject moOut = (MatrixObject) ec.getVariable(output.getName());
    String fname = moOut.getFileName();
    moOut.setPartitioned(_pformat, -1); //modify meta data output
    try {
        //write matrix partitions to hdfs
        WriterBinaryBlock writer = (WriterBinaryBlock) MatrixWriterFactory
                .createMatrixWriter(OutputInfo.BinaryBlockOutputInfo);
        writer.writePartitionedBinaryBlockMatrixToHDFS(new Path(fname),
                new JobConf(ConfigurationManager.getCachedJobConf()), mb, moIn.getNumRows(),
                moIn.getNumColumns(), (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(),
                _pformat);

        //ensure correctness of output characteristics (required if input unknown during compile and no recompile)
        MatrixCharacteristics mc = new MatrixCharacteristics(moIn.getNumRows(), moIn.getNumColumns(),
                (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(), moIn.getNnz());
        MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo,
                InputInfo.BinaryBlockInputInfo);
        moOut.setMetaData(meta);
    } catch (Exception ex) {
        throw new DMLRuntimeException("Failed to execute data partitioning instruction.", ex);
    }

    //release input
    ec.releaseMatrixInput(input1.getName());
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//ww  w .  j  av  a  2 s . c o m

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    if (!AGGREGATE_BLOCK_NNZ)
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java

License:Open Source License

/**
 * //w  w w. j a  v  a2 s  .  co m
 * @param fname
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @param estnnz
 * @return
 * @throws IOException
 * @throws DMLRuntimeException
 */
public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen,
        int brlen, int bclen) throws IOException, DMLRuntimeException {
    //allocate output matrix block collection
    ArrayList<IndexedMatrixValue> ret = new ArrayList<IndexedMatrixValue>();

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);

    //core read 
    readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block (incl block allocation for parallel)
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, true);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*  ww  w.  j  a  v  a 2  s. c om*/

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    if (!AGGREGATE_BLOCK_NNZ)
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParFiles.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//from w w  w .  ja va 2  s  .c  o  m

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryCell.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*from  w ww. j  a v a  2s.  c o  m*/

    //core read 
    readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    //(nnz maintained via append during read for both dense/sparse)
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*from  w ww  . ja v  a  2s .  c o m*/

    //core read 
    if (fs.isDirectory(path))
        readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen);
    else
        readRawTextCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _isMMFile);

    //finally check if change of sparse/dense block representation required
    if (!ret.isInSparseFormat())
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}