Example usage for org.apache.hadoop.mapred RunningJob waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob waitForCompletion.

Prototype

public void waitForCompletion() throws IOException;

Source Link

Document

Blocks until the job is complete.

Usage

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();/* ww  w.jav a 2 s.co  m*/
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}

From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/* ww  w. j a va2 s . c  om*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseDirectOutputFormat.class);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.hcatalog.hbase.TestHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);// w w w.  ja  va  2  s  . co m

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjHTable.error);
    assertEquals(MapReadProjHTable.count, 1);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}

From source file:org.apache.hcatalog.hbase.TestHCatHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);/*w w  w  . j ava2s . co m*/

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseInputFormatProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjectionHTable.error);
    assertEquals(1, MapReadProjectionHTable.count);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}

From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS",
            "3,english:THREE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);//from   ww  w.j  a  v a  2s .  c om
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);
    // why we need to set all the 3 properties??
    job.setOutputFormat(HiveHBaseTableOutputFormat.class);
    job.set(HBaseSerDe.HBASE_TABLE_NAME, tableName);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName", tableName);

    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } catch (Exception ex) {
        throw new IOException("Serialization error " + ex.getMessage(), ex);
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);
    System.getProperty("java.classpath");
    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.mahout.avro.text.mapred.AvroDocumentProcessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/*  ww  w.  j  a v a  2  s . com*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method.

    conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class);

    conf.setJarByClass(AvroDocumentProcessor.class);
    conf.setMapperClass(ProcessorMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(AvroDocument.class);
    conf.setOutputValueClass(NullWritable.class);

    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroInputFormat.setAvroInputClass(conf, AvroDocument.class);
    AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    FileInputFormat.addInputPath(conf, input);
    FileOutputFormat.setOutputPath(conf, output);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    return job.isComplete() ? 0 : 1;
}

From source file:org.apache.mahout.avro.text.mapred.AvroDocumentsWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/*from   w ww .  j  a v a 2s  . c  o  m*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    conf.setJarByClass(AvroDocumentsWordCount.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroInputFormat.setAvroInputClass(conf, AvroDocument.class);
    FileInputFormat.addInputPath(conf, input);
    FileOutputFormat.setOutputPath(conf, output);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    return job.isSuccessful() ? 1 : 0;
}

From source file:org.apache.mahout.avro.text.mapred.WikipediaToAvroDocuments.java

License:Apache License

/**
 * Run the job/*  w  w  w .  j a  v a 2s  . c  om*/
 * 
 * @param input
 *          the input pathname String
 * @param output
 *          the output pathname String
 * @param catFile
 *          the file containing the Wikipedia categories
 * @param exactMatchOnly
 *          if true, then the Wikipedia category must match exactly instead of
 *          simply containing the category string
 * @param all
 *          if true select all categories
 */
public static int runJob(String input, String output, String catFile, boolean exactMatchOnly, boolean all)
        throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(WikipediaToAvroDocuments.class);
    if (log.isInfoEnabled()) {
        log.info("Input: " + input + " Out: " + output + " Categories: " + catFile + " All Files: " + all);
    }

    Path inPath = new Path(input);
    Path outPath = new Path(output);

    FileInputFormat.setInputPaths(conf, inPath);
    FileOutputFormat.setOutputPath(conf, outPath);
    //AvroOutputFormat.setClass(conf, AvroDocument.class);
    //AvroOutputFormat.setSchema(conf, AvroDocument._SCHEMA);

    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(AvroDocument.class);
    conf.setBoolean("exact.match.only", exactMatchOnly);
    conf.setBoolean("all.files", all);
    conf.setMapperClass(WikipediaAvroDocumentMapper.class);
    conf.setInputFormat(XmlInputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class);

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Set<String> categories = new HashSet<String>();
    if (catFile.equals("") == false) {
        for (String line : new FileLineIterable(new File(catFile))) {
            categories.add(line.trim().toLowerCase());
        }
    }

    DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
            GenericsUtil.getClass(categories));

    String categoriesStr = setStringifier.toString(categories);

    conf.set("wikipedia.categories", categoriesStr);

    client.setConf(conf);
    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();
    return job.isSuccessful() ? 1 : 0;
}

From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java

License:Apache License

private static void executeJavaMapReduce(String[] args) throws IOException, InterruptedException {
    JobConf jConf = createSleepMapperReducerJobConf();
    final Path input = new Path(args[1]);
    FileInputFormat.setInputPaths(jConf, input);
    FileOutputFormat.setOutputPath(jConf, new Path(args[2]));
    writeToFile(input, jConf, "dummy\n", "data.txt");
    JobClient jc = new JobClient(jConf);
    System.out.println("Submitting MR job");
    RunningJob job = jc.submitJob(jConf);
    System.out.println("Submitted job " + job.getID().toString());
    writeToFile(input, jConf, job.getID().toString(), JOB_ID_FILE_NAME);
    job.waitForCompletion();
    jc.monitorAndPrintJob(jConf, job);/*from   w  w w  .  j a  v  a  2 s  .  com*/
    if (job.getJobState() != JobStatus.SUCCEEDED) {
        System.err.println(job.getJobState() + " job state instead of" + JobStatus.SUCCEEDED);
        System.exit(-1);
    }
}

From source file:org.dkpro.bigdata.hadoop.DkproHadoopDriver.java

License:Apache License

/**
 * Runs the UIMA pipeline.// ww  w.j a v a  2 s . co  m
 * 
 * @return 0 if Hadoop job succeeded, 1 if job failed, 2 if it was killed, otherwise 3
 * 
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 */
@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.out.println(
                "Usage: " + this.getClass().getSimpleName() + " [hadoop-params] input output [job-params]");
        System.exit(1);
    }
    this.job = new JobConf(getConf(), DkproHadoopDriver.class);
    final FileSystem fs = FileSystem.get(this.job);
    // set the factory class name
    this.job.set("dkpro.uima.factory", this.getClass().getName());
    Path inputPath;
    if (args[0].contains(",")) {
        String[] inputPaths = args[0].split(",");
        inputPath = new Path(inputPaths[0]);
        for (String path : inputPaths) {
            FileInputFormat.addInputPath(job, new Path(path));
        }
    } else {
        inputPath = new Path(args[0]); // input
        FileInputFormat.setInputPaths(this.job, inputPath);

    }
    String outDir = args[1];
    if (!getConf().getBoolean("dkpro.output.overwrite", true)) {
        outDir = getUniqueDirectoryName(outDir, fs);
    }
    final Path outputPath = new Path(outDir);// output
    final CollectionReader reader = buildCollectionReader();
    // if a collection reader was defined, import data into hdfs
    // try {
    // final Class<?> c = Class.forName("org.apache.hadoop.io.compress.SnappyCodec");
    // FileOutputFormat.setOutputCompressorClass(this.job,
    // (Class<? extends CompressionCodec>) c);
    // }
    // catch (final Exception e) {
    //
    // }
    if (reader != null) {
        final AnalysisEngine xcasWriter = AnalysisEngineFactory.createEngine(
                CASWritableSequenceFileWriter.class, // createTypeSystemDescription(),
                CASWritableSequenceFileWriter.PARAM_PATH, inputPath.toString(),
                CASWritableSequenceFileWriter.PARAM_COMPRESS, true, CASWritableSequenceFileWriter.PARAM_FS,
                job.get(("fs.default.name"), "file:/"));
        runPipeline(reader, xcasWriter);
    }
    // cleanup previous output
    fs.delete(outputPath, true);
    // this is a sensible default for the UKP cluster
    //        int numMappers = 256;
    // if (args.length > 2) {
    // numMappers = Integer.parseInt(args[2]);
    // }

    FileOutputFormat.setOutputPath(this.job, outputPath);
    // SequenceFileOutputFormat.setCompressOutput(this.job, true);

    if (this.job.get("mapred.output.compress") == null) {
        this.job.setBoolean("mapred.output.compress", true);
    }
    // Just in case compression is on
    this.job.set("mapred.output.compression.type", "BLOCK");

    if (this.job.getBoolean("dkpro.output.writecas", true)) {
        if (this.job.getBoolean("dkpro.output.plaintext", false)) {
            this.job.setOutputFormat(TextOutputFormat.class);
        } else {
            this.job.setOutputFormat(SequenceFileOutputFormat.class);
        }
    } else {
        job.setOutputFormat(NullOutputFormat.class);
    }
    // this.job.set("mapred.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // use compression
    // setup some sensible defaults
    this.job.setMapperClass(this.mapperClass);
    this.job.setReducerClass(this.reducerClass);
    if (getInputFormatClass() != null) {
        this.job.setInputFormat(getInputFormatClass());
    } else {
        this.job.setInputFormat(SequenceFileInputFormat.class);
    }
    // this.job.setOutputFormat(TextOutputFormat.class);
    this.job.setMapOutputKeyClass(Text.class);
    this.job.setMapOutputValueClass(BinCasWithTypeSystemWritable.class);
    this.job.setOutputKeyClass(Text.class);
    this.job.setOutputValueClass(BinCasWithTypeSystemWritable.class);
    this.job.setJobName(this.getClass().getSimpleName());
    // this.job.set("mapred.child.java.opts", "-Xmx1g");
    //        this.job.setInt("mapred.job.map.memory.mb", 1280);
    //        this.job.setInt("mapred.job.reduce.memory.mb", 1280);
    //        this.job.setNumMapTasks(numMappers);
    this.job.setNumReduceTasks(0);
    configure(this.job);

    // create symlinks for distributed resources
    DistributedCache.createSymlink(this.job);
    // sLogger.info("Running job "+job.getJobName());

    RunningJob runningJob = JobClient.runJob(this.job);
    runningJob.waitForCompletion();
    int status = runningJob.getJobState();
    if (status == JobStatus.SUCCEEDED) {
        return 0;
    } else if (status == JobStatus.FAILED) {
        return 1;
    } else if (status == JobStatus.KILLED) {
        return 2;
    } else {
        return 3;
    }

}