Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.hdfs.concat.crush.CrushReducerTest.java

License:Apache License

@Test
public void missingOutputFormat() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.block.size", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", SequenceFileInputFormat.class.getName());

    reducer = new CrushReducer();

    try {//from  w  ww  .j  a  v a  2s  . com
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"No output format: crush.1.output.format".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushReducerTest.java

License:Apache License

@Test
public void outputFormatWrongType() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_14527_r_1234");

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.setLong("crush.timestamp", 98765);

    job.setLong("dfs.block.size", 1024 * 1024 * 64L);

    job.setInt("crush.num.specs", 2);
    job.set("crush.0.regex", "foo");
    job.set("crush.0.regex.replacement", "bar");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName());

    job.set("crush.1.regex", "hello");
    job.set("crush.1.regex.replacement", "hello");
    job.set("crush.1.input.format", TextInputFormat.class.getName());
    job.set("crush.1.output.format", Object.class.getName());

    reducer = new CrushReducer();

    try {/*from  ww w  .  j  a  v  a2 s .  c o  m*/
        reducer.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

@Before
public void setup() throws Exception {
    job = new JobConf(false);

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.setLong("dfs.block.size", 50);
}

From source file:com.hdfs.concat.crush.CrushTest.java

License:Apache License

@Before
public void setupJob() throws IOException {
    job = new JobConf(false);

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.setInt("mapred.reduce.tasks", 5);
    job.setLong("dfs.block.size", 50);

    FileSystem delegate = FileSystem.get(job);

    fileSystem = new SortingFileSystem(delegate);

    /*// ww w .j  a va  2 s  .c o m
     * Set the working directory so that all relative paths are rooted in the tmp dir. This will keep the file system clean of
     * temporary test files.
     */
    FileSystem.get(job).setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath()));
}

From source file:com.ibm.bi.dml.api.DMLScript.java

License:Open Source License

/**
 * //from   www .j a va  2s . c o m
 * @param config
 * @throws IOException
 * @throws ParseException
 */
private static void cleanupHadoopExecution(DMLConfig config) throws IOException, ParseException {
    //create dml-script-specific suffix
    StringBuilder sb = new StringBuilder();
    sb.append(Lop.FILE_SEPARATOR);
    sb.append(Lop.PROCESS_PREFIX);
    sb.append(DMLScript.getUUID());
    String dirSuffix = sb.toString();

    //1) cleanup scratch space (everything for current uuid) 
    //(required otherwise export to hdfs would skip assumed unnecessary writes if same name)
    MapReduceTool.deleteFileIfExistOnHDFS(config.getTextValue(DMLConfig.SCRATCH_SPACE) + dirSuffix);

    //2) cleanup hadoop working dirs (only required for LocalJobRunner (local job tracker), because
    //this implementation does not create job specific sub directories)
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    if (InfrastructureAnalyzer.isLocalMode(job)) {
        try {
            LocalFileUtils.deleteFileIfExists(DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
                    dirSuffix);
            LocalFileUtils.deleteFileIfExists(MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
                    dirSuffix);
            MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
                    dirSuffix);
            MapReduceTool.deleteFileIfExistOnHDFS(MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
                    dirSuffix);
        } catch (Exception ex) {
            //we give only a warning because those directories are written by the mapred deamon 
            //and hence, execution can still succeed
            LOG.warn("Unable to cleanup hadoop working dirs: " + ex.getMessage());
        }
    }

    //3) cleanup systemml-internal working dirs
    CacheableData.cleanupCacheDir(); //might be local/hdfs
    LocalFileUtils.cleanupWorkingDirectory();
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /* w  w  w.  j  a  v a2 s  .  c  o  m*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    long row = -1;
    long col = -1;

    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area         
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        InputSplit[] splits = informat.getSplits(job, 1);

        LinkedList<Cell> buffer = new LinkedList<Cell>();
        LongWritable key = new LongWritable();
        Text value = new Text();
        FastStringTokenizer st = new FastStringTokenizer(' ');

        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                while (reader.next(key, value)) {
                    st.reset(value.toString()); //reset tokenizer
                    row = st.nextLong();
                    col = st.nextLong();
                    double lvalue = st.nextDouble();
                    Cell tmp = new Cell(row, col, lvalue);

                    buffer.addLast(tmp);
                    if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                    {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }

                //final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        //post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition text cell matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /*  ww w.jav  a 2s . c om*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    long row = -1;
    long col = -1;

    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area   
        LinkedList<Cell> buffer = new LinkedList<Cell>();
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) {
                    row = key.getRowIndex();
                    col = key.getColumnIndex();
                    Cell tmp = new Cell(row, col, value.getValue());

                    buffer.addLast(tmp);
                    if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                    {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }

                //final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        //post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * // w  w w.j  a va 2  s  . c o  m
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    try {
        //create reuse object
        _reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);

        //STEP 1: read matrix from HDFS and write blocks to local staging area   
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) //for each block
                {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();

                    //bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1
                            || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows)
                                + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] "
                                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }

                    appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

/**
 * /*from   w w w .  j  a v a2 s. c  o m*/
 * @param fname
 * @param fnameStaging
 * @param fnameNew
 * @param brlen
 * @param bclen
 * @throws DMLRuntimeException
 */
@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen,
        long clen, int brlen, int bclen) throws DMLRuntimeException {
    try {
        //STEP 1: read matrix from HDFS and write blocks to local staging area   
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(job);

        //prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();

        LinkedList<Cell> buffer = new LinkedList<Cell>();

        for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) //for each block
                {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();

                    //bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1
                            || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows)
                                + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] "
                                + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }

                    boolean sparse = value.isInSparseFormat();
                    if (sparse) //SPARSE
                    {
                        SparseRowsIterator iter = value.getSparseRowsIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            Cell tmp = new Cell(row_offset + lcell.i + 1, col_offset + lcell.j + 1, lcell.v);
                            buffer.addLast(tmp);
                        }
                    } else //DENSE
                    {
                        for (int i = 0; i < rows; i++)
                            for (int j = 0; j < cols; j++) {
                                double lvalue = value.getValueDenseUnsafe(i, j);
                                if (lvalue != 0) //for nnz
                                {
                                    Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
                                    buffer.addLast(tmp);
                                }
                            }
                    }

                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerRemoteMR.java

License:Open Source License

@Override
protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen,
        long clen, int brlen, int bclen) throws DMLRuntimeException {
    String jobname = "ParFor-DPMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

    JobConf job;/*w  w  w .  j a v a2 s.c  o  m*/
    job = new JobConf(DataPartitionerRemoteMR.class);
    if (_pfid >= 0) //use in parfor
        job.setJobName(jobname + _pfid);
    else //use for partition instruction
        job.setJobName("Partition-MR");

    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();

    try {
        //force writing to disk (typically not required since partitioning only applied if dataset exceeds CP size)
        in.exportData(); //written to disk iff dirty

        Path path = new Path(in.getFileName());

        /////
        //configure the MR job
        MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, _format, _n, fnameNew,
                _keepIndexes);

        //set mappers, reducers, combiners
        job.setMapperClass(DataPartitionerRemoteMapper.class);
        job.setReducerClass(DataPartitionerRemoteReducer.class);

        if (oi == OutputInfo.TextCellOutputInfo) {
            //binary cell intermediates for reduced IO 
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryCellOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableBlock.class);

            //check Alignment
            if ((_format == PDataPartitionFormat.ROW_BLOCK_WISE_N && rlen > _n && _n % brlen != 0)
                    || (_format == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && clen > _n && _n % bclen != 0)) {
                throw new DMLRuntimeException(
                        "Data partitioning format " + _format + " requires aligned blocks.");
            }
        }

        //set input format 
        job.setInputFormat(ii.inputFormatClass);

        //set the input path and output path 
        FileInputFormat.setInputPaths(job, path);

        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        //FileOutputFormat.setOutputPath(job, pathNew);
        job.setOutputFormat(NullOutputFormat.class);

        //////
        //set optimization parameters

        //set the number of mappers and reducers 
        //job.setNumMapTasks( _numMappers ); //use default num mappers
        long reducerGroups = -1;
        switch (_format) {
        case ROW_WISE:
            reducerGroups = rlen;
            break;
        case COLUMN_WISE:
            reducerGroups = clen;
            break;
        case ROW_BLOCK_WISE:
            reducerGroups = (rlen / brlen) + ((rlen % brlen == 0) ? 0 : 1);
            break;
        case COLUMN_BLOCK_WISE:
            reducerGroups = (clen / bclen) + ((clen % bclen == 0) ? 0 : 1);
            break;
        case ROW_BLOCK_WISE_N:
            reducerGroups = (rlen / _n) + ((rlen % _n == 0) ? 0 : 1);
            break;
        case COLUMN_BLOCK_WISE_N:
            reducerGroups = (clen / _n) + ((clen % _n == 0) ? 0 : 1);
            break;
        default:
            //do nothing
        }
        job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups));

        //use FLEX scheduler configuration properties
        /*if( ParForProgramBlock.USE_FLEX_SCHEDULER_CONF )
        {
           job.setInt("flex.map.min", 0);
           job.setInt("flex.map.max", _numMappers);
           job.setInt("flex.reduce.min", 0);
           job.setInt("flex.reduce.max", _numMappers);
        }*/

        //disable automatic tasks timeouts and speculative task exec
        job.setInt("mapred.task.timeout", 0);
        job.setMapSpeculativeExecution(false);

        //set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);

        //enables the reuse of JVMs (multiple tasks per MR task)
        if (_jvmReuse)
            job.setNumTasksToExecutePerJvm(-1); //unlimited

        //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower)
        //job.set("mapred.compress.map.output", "true");
        //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");

        //set the replication factor for the results
        job.setInt("dfs.replication", _replication);

        //set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);

        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt("mapreduce.map.maxattempts", _max_retry);

        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);

        /////
        // execute the MR job   
        JobClient.runJob(job);

        //maintain dml script counters
        Statistics.incrementNoOfExecutedMRJobs();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }

    if (DMLScript.STATISTICS && _pfid >= 0) {
        long t1 = System.nanoTime(); //only for parfor 
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
}