Example usage for org.apache.hadoop.mapred JobConf setMapSpeculativeExecution

List of usage examples for org.apache.hadoop.mapred JobConf setMapSpeculativeExecution

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapSpeculativeExecution.

Prototype

public void setMapSpeculativeExecution(boolean speculativeExecution) 

Source Link

Document

Turn speculative execution on or off for this job for map tasks.

Usage

From source file:org.apache.sysml.runtime.matrix.ReblockMR.java

License:Apache License

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens,
        long[] clens, int[] brlens, int[] bclens, long[] nnz, String instructionsInMapper,
        String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication,
        boolean jvmReuse, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
    JobConf job = new JobConf(ReblockMR.class);
    job.setJobName("Reblock-MR");

    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++)
        realIndexes[b] = b;/*from   w  w w.  j  a  v  a2 s. co m*/

    //set up the input files and their format information
    //(internally used input converters: text2bc for text, identity for binary inputs)
    MRJobConfiguration.setUpMultipleInputsReblock(job, realIndexes, inputs, inputInfos, brlens, bclens);

    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens, nnz);

    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);

    //set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);

    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setReblockInstructions(job, reblockInstructions);

    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);

    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);

    //disable automatic tasks timeouts and speculative task exec
    job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
    job.setMapSpeculativeExecution(false);

    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);

    //enable jvm reuse (based on SystemML configuration)
    if (jvmReuse)
        job.setNumTasksToExecutePerJvm(-1);

    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes,
            instructionsInMapper, reblockInstructions, null, otherInstructionsInReducer, resultIndexes);

    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes,
            instructionsInMapper, reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes,
            mapoutputIndexes, false);

    MatrixCharacteristics[] stats = ret.stats;

    //set up the number of reducers (according to output size)
    int numRed = determineNumReducers(rlens, clens, nnz, config.getIntValue(DMLConfig.NUM_REDUCERS),
            ret.numReducerGroups);
    job.setNumReduceTasks(numRed);

    //setup in-memory reduce buffers budget (reblock reducer dont need much memory)
    //job.set(MRConfigurationNames.MR_REDUCE_INPUT_BUFFER_PERCENT, "0.70");

    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);

    // Update resultDimsUnknown based on computed "stats"
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    for (int i = 0; i < resultIndexes.length; i++) {
        if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
            resultDimsUnknown[i] = (byte) 1;
        } else {
            resultDimsUnknown[i] = (byte) 0;
        }
    }

    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true,
            true);

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(ReblockMapper.class);
    job.setMapOutputKeyClass(MatrixIndexes.class); //represent key offsets for block
    job.setMapOutputValueClass(TaggedAdaptivePartialBlock.class); //binary cell/block

    //configure reducer
    job.setReducerClass(ReblockReducer.class);

    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.

    // at this point, both reblock_binary and reblock_text are similar
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }

    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);

    RunningJob runjob = JobClient.runJob(job);

    /* Process different counters */

    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
        //   System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]);
    }

    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static void getData(CloudataConf conf, Path keyPath) throws IOException {
    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", conf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis());

    jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")");

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(ManyTableGetMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, keyPath);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);/*  w w w.j  av  a 2  s.  c o  m*/
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static Path putData() throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(ManyTableJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis());

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    //<MAP>
    jobConf.setMapperClass(ManyTablePutMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(numOfTables);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);//from w  w w .  ja va2 s.  com
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java

License:Apache License

public static Path putData(String outputDir) throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TestMultiThreadCTable.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path(outputDir);

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    JobClient jobClient = new JobClient();

    int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks();
    jobConf.setInt("numOfRowPerMap", numOfRowPerMap);
    //<MAP>
    jobConf.setMapperClass(PutDataMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks());
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);//  w  ww .j  av  a  2 s  .  co  m
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraJob.java

License:Apache License

public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableInfo = new TableSchema(tableName, "Test");
        tableInfo.addColumn(new ColumnInfo("Col1"));
        tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE));
        tableInfo.addColumn(new ColumnInfo("Col3"));
        CTable.createTable(nconf, tableInfo);
    }/*ww w .ja  v  a  2  s  .  c o m*/
    jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")");

    long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets;

    jobConf.setInt("teraJob.dataLength", dataLength);
    jobConf.setLong("teraJob.rowsPerTask", rowsPerTask);

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath));

    //<MAP>
    jobConf.setMapperClass(TeraOnlineMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.setNumMapTasks(numOfTablets);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraReadJob.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>");
        System.exit(0);/*from   ww  w  . j ava 2 s. c  o m*/
    }

    String tableName = args[0];
    String keyOutputPath = args[1];

    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("Error: No table " + tableName);
        System.exit(0);
    }
    Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")");
    jobConf.set("TeraReadJob.tableName", tableName);

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(TeraReadMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath));
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraScanJob.java

License:Apache License

public void runJob(String tableName) throws IOException {
    JobConf jobConf = new JobConf(TeraScanJob.class);

    CloudataConf nconf = new CloudataConf();

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("No table:" + tableName);
        System.exit(0);/*from  w w  w.j a va2 s .  c o m*/
    }
    Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TeraScanMap.class);
    jobConf.setInputFormat(TeraScanJobTabletInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    //    jobConf.setReducerClass(DocFreqReduce.class);
    //    jobConf.setOutputKeyClass(Text.class);
    //    jobConf.setOutputValueClass(Text.class);    
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    FileUtil.delete(fs, tempOutputPath, true);
}

From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java

License:Apache License

public void runJob(String inputPath, String tableName) throws IOException {
    JobConf jobConf = new JobConf(UploadJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    CloudataConf conf = new CloudataConf();
    CTable ctable = CTable.openTable(conf, tableName);
    TabletInfo[] tabletInfos = ctable.listTabletInfos();

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);//from w w  w  . ja va 2 s.c  o m
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(UploadReducer.class);
    jobConf.setReduceSpeculativeExecution(false);
    jobConf.setMaxReduceAttempts(0);
    //Reduce  Tablet 
    jobConf.setNumReduceTasks(tabletInfos.length);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.util.upload.UploadUtil.java

License:Apache License

private void doHadoopUpload(CloudataConf conf) throws IOException {
    if (!CTable.existsTable(conf, tableName)) {
        throw new IOException("No table:" + tableName);
    }// ww  w  .  j av  a2  s  .  c  o  m

    JobConf jobConf = new JobConf(UploadUtil.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set("uploadJob.delim", delim);
    String columnStr = "";
    for (String eachColumn : columns) {
        columnStr += eachColumn + ",";
    }
    jobConf.set("uploadJob.columns", columnStr);

    String fieldNumStr = "";
    for (int eachField : fieldNums) {
        fieldNumStr += eachField + ",";
    }
    jobConf.set("uploadJob.fieldNums", fieldNumStr);
    jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(0);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}