Example usage for org.apache.hadoop.mapred JobConf setMaxReduceAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMaxReduceAttempts.

Prototype

public void setMaxReduceAttempts(int n)

Source Link

Document

Expert: Set the number of maximum attempts that will be made to run a reduce task.

Usage

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);//from  ww  w  .j ava2s  .  co  m
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java

License:Apache License

@Override
public void doTest() throws Exception {

    waitForRecoveriesToFinish(false);//ww w.j a va  2s . c  o m

    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1);

    JobConf jobConf = getJobConf();
    // enable mapred.job.tracker = local to run in debugger and set breakpoints
    // jobConf.set("mapred.job.tracker", "local");
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    MapReduceIndexerTool tool;
    int res;
    QueryResponse results;
    HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url);

    String[] args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1),
            "--verbose", "--go-live" };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));
        assertEquals(20, results.getResults().getNumFound());
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(inDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--verbose", "--go-live",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1) };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));

        assertEquals(22, results.getResults().getNumFound());
    }

    // try using zookeeper
    String collection = "collection1";
    if (random().nextBoolean()) {
        // sometimes, use an alias
        createAlias("updatealias", "collection1");
        collection = "updatealias";
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose",
            "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            "--zk-host", zkServer.getZkAddress(), "--collection", collection };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = server.query(new SolrQuery("*:*"));
        assertEquals(2126, results.getResults().getNumFound());
    }

    server.shutdown();

    // try using zookeeper with replication
    String replicatedCollection = "replicated_collection";
    createCollection(replicatedCollection, 2, 3, 2);
    waitForRecoveriesToFinish(false);
    cloudClient.setDefaultCollection(replicatedCollection);
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live",
            "--zk-host", zkServer.getZkAddress(), "--collection", replicatedCollection, dataDir.toString() };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());

        checkConsistency(replicatedCollection);
    }

    // try using solr_url with replication
    cloudClient.deleteByQuery("*:*");
    cloudClient.commit();
    fs.delete(inDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards", "2", "--mappers=3", "--verbose", "--go-live",
            "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() };
    args = prependInitialArgs(args);

    List<String> argList = new ArrayList<String>();
    getShardUrlArgs(argList, replicatedCollection);
    args = concat(args, argList.toArray(new String[0]));

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        checkConsistency(replicatedCollection);

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());
    }

}

From source file:org.cloudata.core.PerformanceTest.java

License:Apache License

private void runNIsMoreThanOne(final String cmd) throws IOException {
    checkTable();//  w  w w  .j  a  v  a 2s . c  om

    // Run a mapreduce job.  Run as many maps as asked-for clients.
    // Before we start up the job, write out an input file with instruction
    // per client regards which row they are to start on.
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd);
    JobConf job = new JobConf(this.conf, this.getClass());
    FileInputFormat.addInputPath(job, inputDir);
    job.setInputFormat(TextInputFormat.class);
    job.setJobName("Cloudata Performance Evaluation");
    job.setMapperClass(EvaluationMapTask.class);
    job.setMaxMapAttempts(1);
    job.setMaxReduceAttempts(1);
    job.setNumMapTasks(this.N * 10); // Ten maps per client.
    job.setNumReduceTasks(1);
    job.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));
    JobClient.runJob(job);
}

From source file:org.cloudata.examples.first.FirstMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    //Output ? ?/*from   ww  w  .  ja va 2 s .  c  o  m*/
    CloudataConf conf = new CloudataConf();
    String outputTableName = "InvertedTable";
    TableSchema outputTableSchema = new TableSchema();
    outputTableSchema.addColumn("InvertedCloumn");
    if (!CTable.existsTable(conf, outputTableName)) {
        CTable.createTable(conf, outputTableSchema);
    }

    JobConf jobConf = new JobConf(FirstMapReduce.class);
    jobConf.setJobName("FirstMapReduce");
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    //<Mapper>
    //Mapper ? 
    jobConf.setMapperClass(FirstMapReduceMapper.class);
    //InputFormat? TabletInputFormat 
    jobConf.setInputFormat(FirstMapReduceInputFormat.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</Mapper>

    //<Reducer>
    String outputPath = "temp/FirstMapReduce";
    FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));
    //Reducer ? 
    jobConf.setReducerClass(FirstMapReduceReducer.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    //Map Reduce  ?? .
    CTable ctable = CTable.openTable(conf, "SampleTable1");
    TabletInfo[] tabletInfos = ctable.listTabletInfos();
    jobConf.setNumReduceTasks(tabletInfos.length);
    //Reduce?  Tablet? ??  ?  ?
    //Task    ?? ?  0 .
    jobConf.setMaxReduceAttempts(0);
    //</Reducer>

    try {
        //Job 
        JobClient.runJob(jobConf);
    } finally {
        //Temp 
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(outputPath), true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java

License:Apache License

public void runJob(String inputPath, String tableName) throws IOException {
    JobConf jobConf = new JobConf(UploadJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")");

    //KeyRangePartitioner    
    //AbstractTabletInputFormat.OUTPUT_TABLE? ? 
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    CloudataConf conf = new CloudataConf();
    CTable ctable = CTable.openTable(conf, tableName);
    TabletInfo[] tabletInfos = ctable.listTabletInfos();

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(UploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);//from ww  w  .j  a  va 2s . c  o  m
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(UploadReducer.class);
    jobConf.setReduceSpeculativeExecution(false);
    jobConf.setMaxReduceAttempts(0);
    //Reduce  Tablet 
    jobConf.setNumReduceTasks(tabletInfos.length);
    //</Reduce>

    try {
        JobClient.runJob(jobConf);
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.upload.SimpleUploaderMapReduce.java

License:Apache License

public void run(String[] args) throws IOException {
    if (args.length < 3) {
        System.out.println("Usage: java SimpleUploaderMapReduce <input path> <table name> <# reduce>");
        System.exit(0);//from  www.  j a  va  2 s .  c  om
    }

    Path inputPath = new Path(args[0]);
    String tableName = args[1];

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableSchema = new TableSchema(tableName);
        tableSchema.addColumn("Col1");

        Row.Key[] rowKeys = new Row.Key[20];
        for (int i = 0; i < 10; i++) {
            rowKeys[i] = new Row.Key("-0" + i);
        }
        for (int i = 1; i < 10; i++) {
            rowKeys[9 + i] = new Row.Key("0" + i);
        }
        rowKeys[19] = Row.Key.MAX_KEY;

        CTable.createTable(nconf, tableSchema, rowKeys);
    }
    JobConf jobConf = new JobConf(HdfsToCloudataMapReduce.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    // <MAP>
    FileInputFormat.addInputPath(jobConf, inputPath);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(SimpleUploaderMapper.class);
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    // </MAP>

    // <REDUCE>
    FileOutputFormat.setOutputPath(jobConf, new Path("SimpleUploaderMapReduce_" + System.currentTimeMillis()));
    jobConf.setReducerClass(SimpleUploaderReducer.class);
    jobConf.setNumReduceTasks(Integer.parseInt(args[2]));
    jobConf.setMaxReduceAttempts(0);
    // </REDUCE>

    try {
        JobClient.runJob(jobConf);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(FileOutputFormat.getOutputPath(jobConf), true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.examples.web.TermGlobalJob.java

License:Apache License

public void exec() throws Exception {
    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, GLOBAL_TABLE)) {
        TableSchema globalTableInfo = new TableSchema(GLOBAL_TABLE, "Test", GLOBAL_TABLE_COLUMNS);
        CTable.createTable(nconf, globalTableInfo);
    }/* w w  w . j  av  a  2 s .com*/

    Path tempOutputPath = new Path("globalTableInfo" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(WebTableJob.class);
    jobConf.setJobName("TermGlobalJob" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(TermGlobalMap.class);
    jobConf.setInputFormat(DefaultTabletInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[2]);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermGlobalReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setNumReduceTasks(1);
    jobConf.setMaxReduceAttempts(0);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //</REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}

From source file:org.cloudata.examples.web.TermUploadJob.java

License:Apache License

public void exec(String[] options) throws Exception {
    if (options.length < 1) {
        System.out.println("Usage: java TermUploadJob <num of repeats> termUpload <inputPath> [#redcue]");
        System.exit(0);/*from   w  ww . j av a  2s.co m*/
    }
    JobConf jobConf = new JobConf(TermUploadJob.class);
    JobClient jobClinet = new JobClient(jobConf);
    int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2;
    if (options.length > 1) {
        maxReduce = Integer.parseInt(options[1]);
    }

    jobConf.setInt("mapred.task.timeout", 60 * 60 * 1000);

    FileSystem fs = FileSystem.get(jobConf);

    CloudataConf nconf = new CloudataConf();
    if (!CTable.existsTable(nconf, TERM_TABLE)) {
        //Table  
        Path path = new Path("blogdata/tmp/weight");
        FileStatus[] paths = fs.listStatus(path);
        if (paths == null || paths.length == 0) {
            LOG.error("No Partition info:" + path);
            return;
        }
        SortedSet<Text> terms = new TreeSet<Text>();
        Text text = new Text();
        for (FileStatus eachPath : paths) {
            CloudataLineReader reader = new CloudataLineReader(fs.open(eachPath.getPath()));
            while (true) {
                int length = reader.readLine(text);
                if (length <= 0) {
                    break;
                }
                terms.add(new Text(text));
            }
        }

        int temrsPerTablet = terms.size() / (maxReduce - 1);
        int count = 0;
        List<Row.Key> rowKeys = new ArrayList<Row.Key>();
        for (Text term : terms) {
            count++;
            if (count == temrsPerTablet) {
                rowKeys.add(new Row.Key(term.getBytes()));
                count = 0;
            }
        }
        rowKeys.add(Row.Key.MAX_KEY);

        TableSchema temrTableInfo = new TableSchema(TERM_TABLE, "Test", TERM_TABLE_COLUMNS);
        CTable.createTable(nconf, temrTableInfo, rowKeys.toArray(new Row.Key[] {}));
    }
    CTable termTable = CTable.openTable(nconf, TERM_TABLE);
    TabletInfo[] tabletInfos = termTable.listTabletInfos();

    Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis());

    jobConf.setJobName("TermUploadJob" + "(" + new Date() + ")");
    FileInputFormat.addInputPath(jobConf, new Path(options[0]));

    //<MAP>
    jobConf.setMapperClass(TermUploadMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, TERM_TABLE);
    jobConf.setPartitionerClass(WebKeyRangePartitioner.class);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setReducerClass(TermUploadReduce.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setNumReduceTasks(tabletInfos.length);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setNumReduceTasks(maxReduce);
    jobConf.setMaxReduceAttempts(0);
    //<REDUCE>

    //Run Job
    JobClient.runJob(jobConf);

    fs.delete(tempOutputPath);
}

From source file:org.cloudata.util.matrix.AbstractMatrix.java

License:Apache License

public void mutiply(AbstractMatrix targetMatrix, AbstractMatrix resultMatrix) throws IOException {
    Path tempOutputPath = new Path("temp/Matrix_" + System.currentTimeMillis());

    JobConf jobConf = new JobConf(AbstractMatrix.class);
    jobConf.setJobName("Matrix_Mutiply_Job" + "(" + new Date() + ")");

    //<MAP>
    jobConf.setMapperClass(MatrixMutiplyMap.class);
    jobConf.setInputFormat(MatrixInputFormat.class);
    jobConf.set(MatrixInputFormat.MATRIX_INPUT_TABLE, ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_INPUT_COLUMN, columnName);
    jobConf.set(MatrixInputFormat.MATRIX_TARGET_TABLE, targetMatrix.ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_TARGET_COLUMN, targetMatrix.columnName);
    jobConf.setBoolean(MatrixInputFormat.MATRIX_TARGET_SPARSE, targetMatrix.isSparse());
    jobConf.setMapOutputKeyClass(MatrixItem.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</MAP>

    //<REDUCE>
    jobConf.setPartitionerClass(KeyRangePartitioner.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, resultMatrix.ctable.getTableName());
    jobConf.setReducerClass(MatrixMutiplyReduce.class);
    jobConf.set(MatrixInputFormat.MATRIX_RESULT_TABLE, resultMatrix.ctable.getTableName());
    jobConf.set(MatrixInputFormat.MATRIX_RESULT_COLUMN, resultMatrix.columnName);
    jobConf.setBoolean(MatrixInputFormat.MATRIX_RESULT_SPARSE, resultMatrix.isSparse());
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    TabletInfo[] tabletInfos = resultMatrix.ctable.listTabletInfos();

    jobConf.setNumReduceTasks(tabletInfos.length);
    jobConf.setMaxReduceAttempts(0);
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //</REDUCE>

    //Run Job/*from w w  w  .  j  a  v  a2 s  .  com*/
    JobClient.runJob(jobConf);

    //delete temp output path
    FileSystem fs = FileSystem.get(jobConf);
    fs.delete(tempOutputPath, true);
}