Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*from ww  w .ja va 2  s  .c  om*/
 * @param fnameStaging
 * @param fnameStagingCompare
 * @param fnameNew
 * @param metadata
 * @param withCompare
 * @throws IOException
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew,
        MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fnameNew);

    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();

    MatrixIndexes indexes = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);

    SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); //beware ca 50ms
    try {
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++)
            for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
                File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
                File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
                MatrixBlock mb = null;

                long row_offset = (brow - 1) * brlen + 1;
                long col_offset = (bcol - 1) * bclen + 1;

                if (dir.exists()) {
                    if (withCompare && dir2.exists()) //WITH COMPARE BLOCK
                    {
                        //copy only values that are different from the original
                        String[] lnames2 = dir2.list();
                        if (lnames2.length != 1) //there should be exactly 1 compare block
                            throw new DMLRuntimeException(
                                    "Unable to merge results because multiple compare blocks found.");
                        mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen,
                                bclen);
                        boolean appendOnly = mb.isInSparseFormat();
                        double[][] compare = DataConverter.convertToDoubleMatrix(mb);

                        String[] lnames = dir.list();
                        for (String lname : lnames) {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname,
                                    brlen, bclen);
                            mergeWithComp(mb, tmp, compare);
                        }

                        //sort sparse due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    } else //WITHOUT COMPARE BLOCK
                    {
                        //copy all non-zeros from all workers
                        String[] lnames = dir.list();
                        boolean appendOnly = false;
                        for (String lname : lnames) {
                            if (mb == null) {
                                mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen,
                                        bclen);
                                appendOnly = mb.isInSparseFormat();
                            } else {
                                MatrixBlock tmp = StagingFileUtils
                                        .readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                                mergeWithoutComp(mb, tmp, appendOnly);
                            }
                        }

                        //sort sparse due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    }
                }

                //write the block to binary cell
                if (mb != null) {
                    if (mb.isInSparseFormat()) {
                        SparseRowsIterator iter = mb.getSparseRowsIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            indexes.setIndexes(row_offset + lcell.i, col_offset + lcell.j);
                            cell.setValue(lcell.v);
                            out.append(indexes, cell);
                            written = true;
                        }
                    } else {
                        for (int i = 0; i < brlen; i++)
                            for (int j = 0; j < bclen; j++) {
                                double lvalue = mb.getValueDenseUnsafe(i, j);
                                if (lvalue != 0) //for nnz
                                {
                                    indexes.setIndexes(row_offset + i, col_offset + j);
                                    cell.setValue(lvalue);
                                    out.append(indexes, cell);
                                    written = true;
                                }
                            }
                    }
                }
            }

        if (!written)
            out.append(indexes, cell);
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*  www .ja v a2s  . co m*/
 * @param fnameNew
 * @param inMO
 * @throws CacheException
 * @throws IOException
 */
private void copyAllFiles(String fnameNew, ArrayList<MatrixObject> inMO) throws CacheException, IOException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fnameNew);

    //create output dir
    fs.mkdirs(path);

    //merge in all input matrix objects
    IDSequence seq = new IDSequence();
    for (MatrixObject in : inMO) {
        LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName()
                + ") via file rename.");

        //copy over files (just rename file or entire dir)
        Path tmpPath = new Path(in.getFileName());
        String lname = tmpPath.getName();
        fs.rename(tmpPath, new Path(fnameNew + "/" + lname + seq.getNextID()));
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeRemoteMR.java

License:Open Source License

/**
 * /*from w  w  w.j av a 2  s. co m*/
 * @param fname    null if no comparison required
 * @param fnameNew
 * @param srcFnames
 * @param ii
 * @param oi
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings({ "unused", "deprecation" })
protected void executeMerge(String fname, String fnameNew, String[] srcFnames, InputInfo ii, OutputInfo oi,
        long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException {
    String jobname = "ParFor-RMMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

    JobConf job;
    job = new JobConf(ResultMergeRemoteMR.class);
    job.setJobName(jobname + _pfid);

    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();

    //warning for textcell/binarycell without compare
    boolean withCompare = (fname != null);
    if ((oi == OutputInfo.TextCellOutputInfo || oi == OutputInfo.BinaryCellOutputInfo) && !withCompare
            && ResultMergeLocalFile.ALLOW_COPY_CELLFILES)
        LOG.warn("Result merge for " + OutputInfo.outputInfoToString(oi)
                + " without compare can be realized more efficiently with LOCAL_FILE than REMOTE_MR.");

    try {
        Path pathCompare = null;
        Path pathNew = new Path(fnameNew);

        /////
        //configure the MR job
        if (withCompare) {
            pathCompare = new Path(fname).makeQualified(FileSystem.get(job));
            MRJobConfiguration.setResultMergeInfo(job, pathCompare.toString(), ii,
                    LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, brlen,
                    bclen);
        } else
            MRJobConfiguration.setResultMergeInfo(job, "null", ii,
                    LocalFileUtils.getWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE), rlen, clen, bclen,
                    bclen);

        //set mappers, reducers, combiners
        job.setMapperClass(ResultMergeRemoteMapper.class);
        job.setReducerClass(ResultMergeRemoteReducer.class);

        if (oi == OutputInfo.TextCellOutputInfo) {
            job.setMapOutputKeyClass(MatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixCell.class);
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(Text.class);
        } else if (oi == OutputInfo.BinaryCellOutputInfo) {
            job.setMapOutputKeyClass(MatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixCell.class);
            job.setOutputKeyClass(MatrixIndexes.class);
            job.setOutputValueClass(MatrixCell.class);
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            //setup partitioning, grouping, sorting for composite key (old API)
            job.setPartitionerClass(ResultMergeRemotePartitioning.class); //partitioning
            job.setOutputValueGroupingComparator(ResultMergeRemoteGrouping.class); //grouping
            job.setOutputKeyComparatorClass(ResultMergeRemoteSorting.class); //sorting

            job.setMapOutputKeyClass(ResultMergeTaggedMatrixIndexes.class);
            job.setMapOutputValueClass(TaggedMatrixBlock.class);
            job.setOutputKeyClass(MatrixIndexes.class);
            job.setOutputValueClass(MatrixBlock.class);
        }

        //set input format 
        job.setInputFormat(ii.inputFormatClass);

        //set the input path 
        Path[] paths = null;
        if (withCompare) {
            paths = new Path[srcFnames.length + 1];
            paths[0] = pathCompare;
            for (int i = 1; i < paths.length; i++)
                paths[i] = new Path(srcFnames[i - 1]);
        } else {
            paths = new Path[srcFnames.length];
            for (int i = 0; i < paths.length; i++)
                paths[i] = new Path(srcFnames[i]);
        }
        FileInputFormat.setInputPaths(job, paths);

        //set output format
        job.setOutputFormat(oi.outputFormatClass);

        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        FileOutputFormat.setOutputPath(job, pathNew);

        //////
        //set optimization parameters

        //set the number of mappers and reducers 
        //job.setNumMapTasks( _numMappers ); //use default num mappers
        long reducerGroups = _numReducers;
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            reducerGroups = Math.max(rlen / brlen, 1) * Math.max(clen / bclen, 1);
        else //textcell/binarycell
            reducerGroups = Math.max((rlen * clen) / StagingFileUtils.CELL_BUFFER_SIZE, 1);
        job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups));

        //use FLEX scheduler configuration properties
        if (ParForProgramBlock.USE_FLEX_SCHEDULER_CONF) {
            job.setInt("flex.map.min", 0);
            job.setInt("flex.map.max", _numMappers);
            job.setInt("flex.reduce.min", 0);
            job.setInt("flex.reduce.max", _numMappers);
        }

        //disable automatic tasks timeouts and speculative task exec
        job.setInt("mapred.task.timeout", 0);
        job.setMapSpeculativeExecution(false);

        //set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);

        //enables the reuse of JVMs (multiple tasks per MR task)
        if (_jvmReuse)
            job.setNumTasksToExecutePerJvm(-1); //unlimited

        //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower)
        //job.set("mapred.compress.map.output", "true");
        //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");

        //set the replication factor for the results
        job.setInt("dfs.replication", _replication);

        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt("mapreduce.map.maxattempts", _max_retry);

        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);

        /////
        // execute the MR job   

        JobClient.runJob(job);

        //maintain dml script counters
        Statistics.incrementNoOfExecutedMRJobs();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }

    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
}

From source file:com.ibm.bi.dml.runtime.instructions.cp.DataPartitionCPInstruction.java

License:Open Source License

@Override
public void processInstruction(ExecutionContext ec)
        throws DMLUnsupportedOperationException, DMLRuntimeException {
    //get input/*from   ww  w.j a  va 2 s .  c  om*/
    MatrixObject moIn = (MatrixObject) ec.getVariable(input1.getName());
    MatrixBlock mb = moIn.acquireRead();

    //execute operations 
    MatrixObject moOut = (MatrixObject) ec.getVariable(output.getName());
    String fname = moOut.getFileName();
    moOut.setPartitioned(_pformat, -1); //modify meta data output
    try {
        //write matrix partitions to hdfs
        WriterBinaryBlock writer = (WriterBinaryBlock) MatrixWriterFactory
                .createMatrixWriter(OutputInfo.BinaryBlockOutputInfo);
        writer.writePartitionedBinaryBlockMatrixToHDFS(new Path(fname),
                new JobConf(ConfigurationManager.getCachedJobConf()), mb, moIn.getNumRows(),
                moIn.getNumColumns(), (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(),
                _pformat);

        //ensure correctness of output characteristics (required if input unknown during compile and no recompile)
        MatrixCharacteristics mc = new MatrixCharacteristics(moIn.getNumRows(), moIn.getNumColumns(),
                (int) moIn.getNumRowsPerBlock(), (int) moIn.getNumColumnsPerBlock(), moIn.getNnz());
        MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo,
                InputInfo.BinaryBlockInputInfo);
        moOut.setMetaData(meta);
    } catch (Exception ex) {
        throw new DMLRuntimeException("Failed to execute data partitioning instruction.", ex);
    }

    //release input
    ec.releaseMatrixInput(input1.getName());
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//ww  w .  j  av  a  2 s . c o m

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    if (!AGGREGATE_BLOCK_NNZ)
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java

License:Open Source License

/**
 * //w  w w. j a  v  a2 s  .  co m
 * @param fname
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @param estnnz
 * @return
 * @throws IOException
 * @throws DMLRuntimeException
 */
public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen,
        int brlen, int bclen) throws IOException, DMLRuntimeException {
    //allocate output matrix block collection
    ArrayList<IndexedMatrixValue> ret = new ArrayList<IndexedMatrixValue>();

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);

    //core read 
    readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParallel.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block (incl block allocation for parallel)
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, true);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*  ww  w.  j  a  v  a 2  s. c om*/

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    if (!AGGREGATE_BLOCK_NNZ)
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParFiles.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);//from w w  w .  ja va 2  s  .c  o  m

    //core read 
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryCell.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*from  w ww. j  a v a  2s.  c o  m*/

    //core read 
    readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    //finally check if change of sparse/dense block representation required
    //(nnz maintained via append during read for both dense/sparse)
    ret.examSparsity();

    return ret;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java

License:Open Source License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*from  w ww  . ja v  a  2s .  c o m*/

    //core read 
    if (fs.isDirectory(path))
        readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen);
    else
        readRawTextCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _isMMFile);

    //finally check if change of sparse/dense block representation required
    if (!ret.isInSparseFormat())
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}