Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.hdfs.concat.crush.CrushReducerTest.java

License:Apache License

@Test
public void missingOutputFormat() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.block.size", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", SequenceFileInputFormat.class.getName());

    reducer = new CrushReducer();

    try {//from  w  ww  .j  a  v a  2s  . com
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"No output format: crush.1.output.format".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushReducerTest.java

License:Apache License

@Test
public void outputFormatWrongType() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.block.size", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", TextInputFormat.class.getName());
    job.set("crush.1.output.format", Object.class.getName());

    reducer = new CrushReducer();

    try {/*from  ww w  .  j  a  v  a2 s .  c o  m*/
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

@Before
public void setup() throws Exception {
    job = new JobConf(false);

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.setLong("dfs.block.size", 50);
}

From source file:com.hdfs.concat.crush.CrushTest.java

License:Apache License

@Before
public void setupJob() throws IOException {
    job = new JobConf(false);

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.setInt("mapred.reduce.tasks", 5);
    job.setLong("dfs.block.size", 50);

    FileSystem delegate = FileSystem.get(job);

    fileSystem = new SortingFileSystem(delegate);

    /*// ww w .j  a va  2 s  .c o m
     * Set the working directory so that all relative paths are rooted in the tmp dir. This will keep the file system clean of
     * temporary test files.
     */
    FileSystem.get(job).setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath()));
}

From source file:com.ibm.bi.dml.api.DMLScript.java

License:Open Source License

/**
 * //from   www .j a va  2s . c o m
 * @param config
 * @throws IOException
 * @throws ParseException
 */
private static void cleanupHadoopExecution(DMLConfig config) throws IOException, ParseException {
    //create dml-script-specific suffix
    StringBuilder sb = new StringBuilder();
    sb.append(Lop.FILE_SEPARATOR);
    sb.append(Lop.PROCESS_PREFIX);
    sb.append(DMLScript.getUUID());
    String dirSuffix = sb.toString();

    //1) cleanup scratch space (everything for current uuid) 
    //(required otherwise export to hdfs would skip assumed unnecessary writes if same name)
    MapReduceTool.deleteFileIfExistOnHDFS(config.getTextValue(DMLConfig.SCRATCH_SPACE) + dirSuffix);

    //2) cleanup hadoop working dirs (only required for LocalJobRunner (local job tracker), because
    //this implementation does not create job specific sub directories)
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    if (InfrastructureAnalyzer.isLocalMode(job)) {
        try {
            LocalFileUtils.deleteFileIfExists(DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
                    dirSuffix);
            LocalFileUtils.deleteFileIfExists(MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
                    dirSuffix);
            MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
                    dirSuffix);
            MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
                    dirSuffix);
        } catch (Exception ex) {
            //we give only a warning because those directories are written by the mapred deamon 
            //and hence, execution can still succeed
            LOG.warn("Unable to cleanup hadoop working dirs: " + ex.getMessage());
        }
    }

    //3) cleanup systemml-internal working dirs
    CacheableData.cleanupCacheDir(); //might be local/hdfs
    LocalFileUtils.cleanupWorkingDirectory();
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /* w  w  w.  j  a  v a2 s  .  c  o  m*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    long row = -1;
    long col = -1;

    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area         
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        InputSplit[] splits = informat.getSplits(job, 1);

        LinkedList<Cell> buffer = new LinkedList<Cell>();
        LongWritable key = new LongWritable();
        Text value = new Text();
        FastStringTokenizer st = new FastStringTokenizer(' ');

        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                while (reader.next(key, value)) {
                    st.reset(value.toString()); //reset tokenizer
                    row = st.nextLong();
                    col = st.nextLong();
                    double lvalue = st.nextDouble();
                    Cell tmp = new Cell(row, col, lvalue);

                    buffer.addLast(tmp);
                    if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                    {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }

                //final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        //post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition text cell matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /*  ww w.jav  a 2s . c om*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    long row = -1;
    long col = -1;

    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area   
        LinkedList<Cell> buffer = new LinkedList<Cell>();
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) {
                    row = key.getRowIndex();
                    col = key.getColumnIndex();
                    Cell tmp = new Cell(row, col, value.getValue());

                    buffer.addLast(tmp);
                    if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                    {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }

                //final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        //post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * // w  w w.j  a va 2  s  . c o  m
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    try {
        //create reuse object
        _reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);

        //STEP 1: read matrix from HDFS and write blocks to local staging area   
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) //for each block
                {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();

                    //bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1
                            || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows)
                                + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] "
                                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }

                    appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /*from   w w w .  j  a v a2 s. c  o m*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen,
        long clen, int brlen, int bclen) throws DMLRuntimeException {
    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area   
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();

        LinkedList<Cell> buffer = new LinkedList<Cell>();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) //for each block
                {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();

                    //bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1
                            || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows)
                                + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] "
                                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }

                    boolean sparse = value.isInSparseFormat();
                    if (sparse) //SPARSE
                    {
                        SparseRowsIterator iter = value.getSparseRowsIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            Cell tmp = new Cell(row_offset + lcell.i + 1, col_offset + lcell.j + 1, lcell.v);
                            buffer.addLast(tmp);
                        }
                    } else //DENSE
                    {
                        for (int i = 0; i < rows; i++)
                            for (int j = 0; j < cols; j++) {
                                double lvalue = value.getValueDenseUnsafe(i, j);
                                if (lvalue != 0) //for nnz
                                {
                                    Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
                                    buffer.addLast(tmp);
                                }
                            }
                    }

                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerRemoteMR.java

License:Open Source License

@Override
protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen,
        long clen, int brlen, int bclen) throws DMLRuntimeException {
    String jobname = "ParFor-DPMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

    JobConf job;/*w  w  w .  j a v a2 s.c  o  m*/
    job = new JobConf(DataPartitionerRemoteMR.class);
    if (_pfid >= 0) //use in parfor
        job.setJobName(jobname + _pfid);
    else //use for partition instruction
        job.setJobName("Partition-MR");

    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();

    try {
        //force writing to disk (typically not required since partitioning only applied if dataset exceeds CP size)
        in.exportData(); //written to disk iff dirty

        Path path = new Path(in.getFileName());

        /////
        //configure the MR job
        MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, _format, _n, fnameNew,
                _keepIndexes);

        //set mappers, reducers, combiners
        job.setMapperClass(DataPartitionerRemoteMapper.class);
        job.setReducerClass(DataPartitionerRemoteReducer.class);

        if (oi == OutputInfo.TextCellOutputInfo) {
            //binary cell intermediates for reduced IO 
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryCellOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableBlock.class);

            //check Alignment
            if ((_format == PDataPartitionFormat.ROW_BLOCK_WISE_N && rlen > _n && _n % brlen != 0)
                    || (_format == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && clen > _n && _n % bclen != 0)) {
                throw new DMLRuntimeException(
                        "Data partitioning format " + _format + " requires aligned blocks.");
            }
        }

        //set input format 
        job.setInputFormat(ii.inputFormatClass);

        //set the input path and output path 
        FileInputFormat.setInputPaths(job, path);

        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        //FileOutputFormat.setOutputPath(job, pathNew);
        job.setOutputFormat(NullOutputFormat.class);

        //////
        //set optimization parameters

        //set the number of mappers and reducers 
        //job.setNumMapTasks( _numMappers ); //use default num mappers
        long reducerGroups = -1;
        switch (_format) {
        case ROW_WISE:
            reducerGroups = rlen;
            break;
        case COLUMN_WISE:
            reducerGroups = clen;
            break;
        case ROW_BLOCK_WISE:
            reducerGroups = (rlen / brlen) + ((rlen % brlen == 0) ? 0 : 1);
            break;
        case COLUMN_BLOCK_WISE:
            reducerGroups = (clen / bclen) + ((clen % bclen == 0) ? 0 : 1);
            break;
        case ROW_BLOCK_WISE_N:
            reducerGroups = (rlen / _n) + ((rlen % _n == 0) ? 0 : 1);
            break;
        case COLUMN_BLOCK_WISE_N:
            reducerGroups = (clen / _n) + ((clen % _n == 0) ? 0 : 1);
            break;
        default:
            //do nothing
        }
        job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups));

        //use FLEX scheduler configuration properties
        /*if( ParForProgramBlock.USE_FLEX_SCHEDULER_CONF )
        {
           job.setInt("flex.map.min", 0);
           job.setInt("flex.map.max", _numMappers);
           job.setInt("flex.reduce.min", 0);
           job.setInt("flex.reduce.max", _numMappers);
        }*/

        //disable automatic tasks timeouts and speculative task exec
        job.setInt("mapred.task.timeout", 0);
        job.setMapSpeculativeExecution(false);

        //set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);

        //enables the reuse of JVMs (multiple tasks per MR task)
        if (_jvmReuse)
            job.setNumTasksToExecutePerJvm(-1); //unlimited

        //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower)
        //job.set("mapred.compress.map.output", "true");
        //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");

        //set the replication factor for the results
        job.setInt("dfs.replication", _replication);

        //set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);

        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt("mapreduce.map.maxattempts", _max_retry);

        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);

        /////
        // execute the MR job   
        JobClient.runJob(job);

        //maintain dml script counters
        Statistics.incrementNoOfExecutedMRJobs();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }

    if (DMLScript.STATISTICS && _pfid >= 0) {
        long t1 = System.nanoTime(); //only for parfor 
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
}