Example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMaxMapAttempts.

Prototype

public void setMaxMapAttempts(int n)

Source Link

Document

Expert: Set the number of maximum attempts that will be made to run a map task.

Usage

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);/*from ww w . j a v  a 2s  .co  m*/
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java

License:Apache License

@Override
public void doTest() throws Exception {

    waitForRecoveriesToFinish(false);/*from ww  w  .j  ava  2  s  .c om*/

    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1);

    JobConf jobConf = getJobConf();
    // enable mapred.job.tracker = local to run in debugger and set breakpoints
    // jobConf.set("mapred.job.tracker", "local");
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    MapReduceIndexerTool tool;
    int res;
    QueryResponse results;
    HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url);

    String[] args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1),
            "--verbose", "--go-live" };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));
        assertEquals(20, results.getResults().getNumFound());
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(inDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--verbose", "--go-live",
            ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url",
            cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url",
            cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1) };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());
        results = server.query(new SolrQuery("*:*"));

        assertEquals(22, results.getResults().getNumFound());
    }

    // try using zookeeper
    String collection = "collection1";
    if (random().nextBoolean()) {
        // sometimes, use an alias
        createAlias("updatealias", "collection1");
        collection = "updatealias";
    }

    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose",
            "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            "--zk-host", zkServer.getZkAddress(), "--collection", collection };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = server.query(new SolrQuery("*:*"));
        assertEquals(2126, results.getResults().getNumFound());
    }

    server.shutdown();

    // try using zookeeper with replication
    String replicatedCollection = "replicated_collection";
    createCollection(replicatedCollection, 2, 3, 2);
    waitForRecoveriesToFinish(false);
    cloudClient.setDefaultCollection(replicatedCollection);
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live",
            "--zk-host", zkServer.getZkAddress(), "--collection", replicatedCollection, dataDir.toString() };
    args = prependInitialArgs(args);

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());

        checkConsistency(replicatedCollection);
    }

    // try using solr_url with replication
    cloudClient.deleteByQuery("*:*");
    cloudClient.commit();
    fs.delete(inDir, true);
    fs.delete(dataDir, true);
    assertTrue(fs.mkdirs(dataDir));
    INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);

    args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards", "2", "--mappers=3", "--verbose", "--go-live",
            "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() };
    args = prependInitialArgs(args);

    List<String> argList = new ArrayList<String>();
    getShardUrlArgs(argList, replicatedCollection);
    args = concat(args, argList.toArray(new String[0]));

    if (true) {
        tool = new MapReduceIndexerTool();
        res = ToolRunner.run(jobConf, tool, args);
        assertEquals(0, res);
        assertTrue(tool.job.isComplete());
        assertTrue(tool.job.isSuccessful());

        checkConsistency(replicatedCollection);

        results = cloudClient.query(new SolrQuery("*:*"));
        assertEquals(2104, results.getResults().getNumFound());
    }

}

From source file:org.cloudata.core.PerformanceTest.java

License:Apache License

private void runNIsMoreThanOne(final String cmd) throws IOException {
    checkTable();//  w  w  w.j a  v a2  s .co m

    // Run a mapreduce job.  Run as many maps as asked-for clients.
    // Before we start up the job, write out an input file with instruction
    // per client regards which row they are to start on.
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd);
    JobConf job = new JobConf(this.conf, this.getClass());
    FileInputFormat.addInputPath(job, inputDir);
    job.setInputFormat(TextInputFormat.class);
    job.setJobName("Cloudata Performance Evaluation");
    job.setMapperClass(EvaluationMapTask.class);
    job.setMaxMapAttempts(1);
    job.setMaxReduceAttempts(1);
    job.setNumMapTasks(this.N * 10); // Ten maps per client.
    job.setNumReduceTasks(1);
    job.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));
    JobClient.runJob(job);
}

From source file:org.cloudata.core.tablet.backup.RestoreBinaryJob.java

License:Apache License

/**
 * @param string//from  w w  w  . j av a 2s. c o m
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(BackupJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(tableName + " restore");

    partitionJob.setMapperClass(RestoreBinaryPartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);

    ////////////////////////////////////////////////////////////////
    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(tableName + " restore");

    jobConf.setMapperClass(RestoreBinaryMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);
    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);

    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.tablet.backup.RestoreJob.java

License:Apache License

/**
 * @param string//  ww  w  .  j  a v a  2  s . c  om
 * @param string2
 * @param binary
 */
public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf partitionJob = new JobConf(RestoreJob.class);

    FileSystem fs = FileSystem.get(partitionJob);

    if (!fs.exists(new Path(inputPath))) {
        throw new IOException("input path not exists:" + inputPath);
    }

    if (CTable.existsTable(nconf, tableName)) {
        throw new IOException("table already exists" + tableName);
    }

    TableSchema tableSchema = new TableSchema(tableName, "", columnNames);
    tableSchema.setNumOfVersion(numOfVersion);
    CTable.createTable(nconf, tableSchema);

    String columns = "";
    for (String eachColumn : columnNames) {
        columns += eachColumn.trim() + ",";
    }
    columns = columns.substring(0, columns.length() - 1);

    String jobName = tableName + " restore";
    String tempDir = jobName + "_" + System.currentTimeMillis();

    partitionJob.setJobName(jobName + "_partition");

    partitionJob.setMapperClass(RestorePartitionMap.class);
    FileInputFormat.addInputPath(partitionJob, new Path(inputPath));
    partitionJob.setInputFormat(RestoreTextInputFormat.class);
    partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir));

    //map only
    partitionJob.setNumReduceTasks(0);

    JobClient.runJob(partitionJob);

    fs.delete(new Path(tempDir), true);
    ////////////////////////////////////////////////////////////

    JobConf jobConf = new JobConf(BackupJob.class);
    jobConf.setJobName(jobName);

    jobConf.setMapperClass(RestoreMap.class);
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(RestoreTextInputFormat.class);
    jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns);
    FileOutputFormat.setOutputPath(jobConf, new Path(tempDir));
    jobConf.setMaxMapAttempts(0);

    //map only
    jobConf.setNumReduceTasks(0);

    JobClient.runJob(jobConf);
    //delete temp output dir
    fs.delete(new Path(tempDir), true);
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static void getData(CloudataConf conf, Path keyPath) throws IOException {
    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", conf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis());

    jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")");

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(ManyTableGetMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, keyPath);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);/*from  w w  w.j a  va  2  s . c o  m*/
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.ManyTableJob.java

License:Apache License

public static Path putData() throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(ManyTableJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis());

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    //<MAP>
    jobConf.setMapperClass(ManyTablePutMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(numOfTables);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);//from  w ww  . j  a va  2 s . com
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java

License:Apache License

public static Path putData(String outputDir) throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TestMultiThreadCTable.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")");

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    Path outputPath = new Path(outputDir);

    FileOutputFormat.setOutputPath(jobConf, outputPath);

    JobClient jobClient = new JobClient();

    int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks();
    jobConf.setInt("numOfRowPerMap", numOfRowPerMap);
    //<MAP>
    jobConf.setMapperClass(PutDataMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks());
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);//from  ww  w . j a  v a 2s .c om
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
        return outputPath;
    } finally {
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraJob.java

License:Apache License

public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath)
        throws IOException {
    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        TableSchema tableInfo = new TableSchema(tableName, "Test");
        tableInfo.addColumn(new ColumnInfo("Col1"));
        tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE));
        tableInfo.addColumn(new ColumnInfo("Col3"));
        CTable.createTable(nconf, tableInfo);
    }//from   w  w  w  .jav a2s  . co  m
    jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")");

    long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets;

    jobConf.setInt("teraJob.dataLength", dataLength);
    jobConf.setLong("teraJob.rowsPerTask", rowsPerTask);

    jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000);

    FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath));

    //<MAP>
    jobConf.setMapperClass(TeraOnlineMap.class);
    jobConf.setInputFormat(SimpleInputFormat.class);
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);
    jobConf.setNumMapTasks(numOfTablets);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.cloudata.core.testjob.tera.TeraReadJob.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>");
        System.exit(0);//  w w  w . j a  va 2  s  . c o m
    }

    String tableName = args[0];
    String keyOutputPath = args[1];

    CloudataConf nconf = new CloudataConf();

    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", nconf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    if (!CTable.existsTable(nconf, tableName)) {
        System.out.println("Error: No table " + tableName);
        System.exit(0);
    }
    Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis());

    jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")");
    jobConf.set("TeraReadJob.tableName", tableName);

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    //<MAP>
    jobConf.setMapperClass(TeraReadMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath));
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    //</MAP>

    //<REDUCE>
    jobConf.setNumReduceTasks(0);
    //</REDUCE>

    try {
        //Run Job
        JobClient.runJob(jobConf);
    } finally {
        //delete temp output path
        FileSystem fs = FileSystem.get(jobConf);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}