Example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts

List of usage examples for org.apache.hadoop.mapred JobConf setMaxMapAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts.

Prototype

public void setMaxMapAttempts(int n) 

Source Link

Document

Expert: Set the number of maximum attempts that will be made to run a map task.

Usage

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);/*from ww w . j a v  a 2s  .co  m*/
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java

License:Apache License

@Override
public void doTest() throws Exception {

    waitForRecoveriesToFinish(false);/*from ww  w  .j  ava  2  s  .c om*/

    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1);

    JobConf jobConf = getJobConf();
    // enable mapred.job.tracker = local to run in debugger and set breakpoints
    // jobConf.set("mapred.job.tracker", "local");
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    MapReduceIndexerTool tool;
    int res;
    QueryResponse results;
    HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url);

    String[] args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1),
            "--verbose", "--go-live" };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));
        assertEquals(20, results.getResults().getNumFound());
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(inDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--verbose", "--go-live",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1) };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));

        assertEquals(22, results.getResults().getNumFound());
    }

    // try using zookeeper
    String collection = "collection1";
    if (random().nextBoolean()) {
        // sometimes, use an alias
        createAlias("updatealias", "collection1");
        collection = "updatealias";
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose",
            "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            "--zk-host", zkServer.getZkAddress(), "--collection", collection };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = server.query(new SolrQuery("*:*"));
        assertEquals(2126, results.getResults().getNumFound());
    }

    server.shutdown();

    // try using zookeeper with replication
    String replicatedCollection = "replicated_collection";
    createCollection(replicatedCollection, 2, 3, 2);
    waitForRecoveriesToFinish(false);
    cloudClient.setDefaultCollection(replicatedCollection);
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live",
            "--zk-host", zkServer.getZkAddress(), "--collection", replicatedCollection, dataDir.toString() };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());

        checkConsistency(replicatedCollection);
    }

    // try using solr_url with replication
    cloudClient.deleteByQuery("*:*");
    cloudClient.commit();
    fs.delete(inDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards", "2", "--mappers=3", "--verbose", "--go-live",
            "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() };
    args = prependInitialArgs(args);

    List<String> argList = new ArrayList<String>();
    getShardUrlArgs(argList, replicatedCollection);
    args = concat(args, argList.toArray(new String[0]));

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        checkConsistency(replicatedCollection);

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());
    }

}

From source file:org.cloudata.core.PerformanceTest.java

License:Apache License

private void runNIsMoreThanOne(final String cmd) throws IOException {
    checkTable();//  w  w  w.j a  v a2  s .co m

    // Run a mapreduce job.  Run as many maps as asked-for clients.
    // Before we start up the job, write out an input file with instruction
    // per client regards which row they are to start on.
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd);
    JobConf job = new JobConf(this.conf, this.getClass());
    FileInputFormat.addInputPath(job, inputDir);
    job.setInputFormat(TextInputFormat.class);
    job.setJobName("Cloudata Performance Evaluation");
    job.setMapperClass(EvaluationMapTask.class);
    job.setMaxMapAttempts(1);
    job.setMaxReduceAttempts(1);
    job.setNumMapTasks(this.N * 10); // Ten maps per client.
    job.setNumReduceTasks(1);
    job.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));
    JobClient.runJob(job);
}

From source file:org.cloudata.core.tablet.backup.RestoreBinaryJob.java

License:Apache License

/**
 * @param string//from  w w  w  . j av a 2s. c o m
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(BackupJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(tableName + " restore");

    partitionJob.setMapperClass(RestoreBinaryPartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);

    ////////////////////////////////////////////////////////////////
    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(tableName + " restore");

    jobConf.setMapperClass(RestoreBinaryMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);
    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.tablet.backup.RestoreJob.java

License:Apache License

/**
 * @param string//  ww  w  .  j  a v a  2  s . c  om
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(RestoreJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(jobName + "_partition");

    partitionJob.setMapperClass(RestorePartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreTextInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    fs.delete(new Path(tempDir), true);
    ////////////////////////////////////////////////////////////

    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(jobName);

    jobConf.setMapperClass(RestoreMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreTextInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);

    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);
    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static void getData(CloudataConf conf, Path keyPath) throws IOException {
    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", conf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis());

    jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")");

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(ManyTableGetMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, keyPath);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);/*from  w w  w.j a  va  2  s . c o  m*/
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static Path putData() throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(ManyTableJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis());

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    //<MAP>
    jobConf.setMapperClass(ManyTablePutMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(numOfTables);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);//from  w ww  . j  a va  2 s . com
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java

License:Apache License

public static Path putData(String outputDir) throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TestMultiThreadCTable.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path(outputDir);

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    JobClient jobClient = new JobClient();

    int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks();
    jobConf.setInt("numOfRowPerMap", numOfRowPerMap);
    //<MAP>
    jobConf.setMapperClass(PutDataMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks());
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);//from  ww  w . j a  v a 2s .c om
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraJob.java

License:Apache License

public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableInfo = new TableSchema(tableName, "Test");
        tableInfo.addColumn(new ColumnInfo("Col1"));
        tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE));
        tableInfo.addColumn(new ColumnInfo("Col3"));
        CTable.createTable(nconf, tableInfo);
    }//from   w  w  w  .jav a2s  . co  m
    jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")");

    long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets;

    jobConf.setInt("teraJob.dataLength", dataLength);
    jobConf.setLong("teraJob.rowsPerTask", rowsPerTask);

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath));

    //<MAP>
    jobConf.setMapperClass(TeraOnlineMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.setNumMapTasks(numOfTablets);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraReadJob.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>");
        System.exit(0);//  w w  w . j a  va 2  s  . c o m
    }

    String tableName = args[0];
    String keyOutputPath = args[1];

    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("Error: No table " + tableName);
        System.exit(0);
    }
    Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")");
    jobConf.set("TeraReadJob.tableName", tableName);

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(TeraReadMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath));
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}