Example usage for org.apache.hadoop.mapred RunningJob waitForCompletion

List of usage examples for org.apache.hadoop.mapred RunningJob waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob waitForCompletion.

Prototype

public void waitForCompletion() throws IOException;

Source Link

Document

Blocks until the job is complete.

Usage

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();/* ww  w.jav a 2 s.co  m*/
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}

From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/* ww  w. j a va2 s . c  om*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseDirectOutputFormat.class);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.hcatalog.hbase.TestHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);// w w w.  ja  va  2  s  . co m

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjHTable.error);
    assertEquals(MapReadProjHTable.count, 1);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}

From source file:org.apache.hcatalog.hbase.TestHCatHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);/*w w  w  . j ava2s . co m*/

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseInputFormatProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjectionHTable.error);
    assertEquals(1, MapReadProjectionHTable.count);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}

From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS",
            "3,english:THREE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);//from   ww  w.j  a  v a  2s .  c om
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);
    // why we need to set all the 3 properties??
    job.setOutputFormat(HiveHBaseTableOutputFormat.class);
    job.set(HBaseSerDe.HBASE_TABLE_NAME, tableName);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName", tableName);

    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } catch (Exception ex) {
        throw new IOException("Serialization error " + ex.getMessage(), ex);
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);
    System.getProperty("java.classpath");
    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.mahout.avro.text.mapred.AvroDocumentProcessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/*  ww  w.  j  a v a  2  s . com*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method.

    conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class);

    conf.setJarByClass(AvroDocumentProcessor.class);
    conf.setMapperClass(ProcessorMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(AvroDocument.class);
    conf.setOutputValueClass(NullWritable.class);

    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroInputFormat.setAvroInputClass(conf, AvroDocument.class);
    AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    FileInputFormat.addInputPath(conf, input);
    FileOutputFormat.setOutputPath(conf, output);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    return job.isComplete() ? 0 : 1;
}

From source file:org.apache.mahout.avro.text.mapred.AvroDocumentsWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/*from   w ww .  j  a v a 2s  . c  o  m*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    conf.setJarByClass(AvroDocumentsWordCount.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroInputFormat.setAvroInputClass(conf, AvroDocument.class);
    FileInputFormat.addInputPath(conf, input);
    FileOutputFormat.setOutputPath(conf, output);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    return job.isSuccessful() ? 1 : 0;
}

From source file:org.apache.mahout.avro.text.mapred.WikipediaToAvroDocuments.java

License:Apache License

/**
 * Run the job/*  w  w  w .  j a  v a 2s  . c  om*/
 * 
 * @param input
 *          the input pathname String
 * @param output
 *          the output pathname String
 * @param catFile
 *          the file containing the Wikipedia categories
 * @param exactMatchOnly
 *          if true, then the Wikipedia category must match exactly instead of
 *          simply containing the category string
 * @param all
 *          if true select all categories
 */
public static int runJob(String input, String output, String catFile, boolean exactMatchOnly, boolean all)
        throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(WikipediaToAvroDocuments.class);
    if (log.isInfoEnabled()) {
        log.info("Input: " + input + " Out: " + output + " Categories: " + catFile + " All Files: " + all);
    }

    Path inPath = new Path(input);
    Path outPath = new Path(output);

    FileInputFormat.setInputPaths(conf, inPath);
    FileOutputFormat.setOutputPath(conf, outPath);
    //AvroOutputFormat.setClass(conf, AvroDocument.class);
    //AvroOutputFormat.setSchema(conf, AvroDocument._SCHEMA);

    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(AvroDocument.class);
    conf.setBoolean("exact.match.only", exactMatchOnly);
    conf.setBoolean("all.files", all);
    conf.setMapperClass(WikipediaAvroDocumentMapper.class);
    conf.setInputFormat(XmlInputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class);

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Set<String> categories = new HashSet<String>();
    if (catFile.equals("") == false) {
        for (String line : new FileLineIterable(new File(catFile))) {
            categories.add(line.trim().toLowerCase());
        }
    }

    DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
            GenericsUtil.getClass(categories));

    String categoriesStr = setStringifier.toString(categories);

    conf.set("wikipedia.categories", categoriesStr);

    client.setConf(conf);
    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();
    return job.isSuccessful() ? 1 : 0;
}

From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java

License:Apache License

private static void executeJavaMapReduce(String[] args) throws IOException, InterruptedException {
    JobConf jConf = createSleepMapperReducerJobConf();
    final Path input = new Path(args[1]);
    FileInputFormat.setInputPaths(jConf, input);
    FileOutputFormat.setOutputPath(jConf, new Path(args[2]));
    writeToFile(input, jConf, "dummy\n", "data.txt");
    JobClient jc = new JobClient(jConf);
    System.out.println("Submitting MR job");
    RunningJob job = jc.submitJob(jConf);
    System.out.println("Submitted job " + job.getID().toString());
    writeToFile(input, jConf, job.getID().toString(), JOB_ID_FILE_NAME);
    job.waitForCompletion();
    jc.monitorAndPrintJob(jConf, job);/*from   w  w w  .  j a  v  a  2 s  .  com*/
    if (job.getJobState() != JobStatus.SUCCEEDED) {
        System.err.println(job.getJobState() + " job state instead of" + JobStatus.SUCCEEDED);
        System.exit(-1);
    }
}

From source file:org.dkpro.bigdata.hadoop.DkproHadoopDriver.java

License:Apache License

/**
 * Runs the UIMA pipeline.// ww  w.j a v a  2 s . co  m
 * 
 * @return 0 if Hadoop job succeeded, 1 if job failed, 2 if it was killed, otherwise 3
 * 
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 */
@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.out.println(
                "Usage: " + this.getClass().getSimpleName() + " [hadoop-params] input output [job-params]");
        System.exit(1);
    }
    this.job = new JobConf(getConf(), DkproHadoopDriver.class);
    final FileSystem fs = FileSystem.get(this.job);
    // set the factory class name
    this.job.set("dkpro.uima.factory", this.getClass().getName());
    Path inputPath;
    if (args[0].contains(",")) {
        String[] inputPaths = args[0].split(",");
        inputPath = new Path(inputPaths[0]);
        for (String path : inputPaths) {
            FileInputFormat.addInputPath(job, new Path(path));
        }
    } else {
        inputPath = new Path(args[0]); // input
        FileInputFormat.setInputPaths(this.job, inputPath);

    }
    String outDir = args[1];
    if (!getConf().getBoolean("dkpro.output.overwrite", true)) {
        outDir = getUniqueDirectoryName(outDir, fs);
    }
    final Path outputPath = new Path(outDir);// output
    final CollectionReader reader = buildCollectionReader();
    // if a collection reader was defined, import data into hdfs
    // try {
    // final Class<?> c = Class.forName("org.apache.hadoop.io.compress.SnappyCodec");
    // FileOutputFormat.setOutputCompressorClass(this.job,
    // (Class<? extends CompressionCodec>) c);
    // }
    // catch (final Exception e) {
    //
    // }
    if (reader != null) {
        final AnalysisEngine xcasWriter = AnalysisEngineFactory.createEngine(
                CASWritableSequenceFileWriter.class, // createTypeSystemDescription(),
                CASWritableSequenceFileWriter.PARAM_PATH, inputPath.toString(),
                CASWritableSequenceFileWriter.PARAM_COMPRESS, true, CASWritableSequenceFileWriter.PARAM_FS,
                job.get(("fs.default.name"), "file:/"));
        runPipeline(reader, xcasWriter);
    }
    // cleanup previous output
    fs.delete(outputPath, true);
    // this is a sensible default for the UKP cluster
    //        int numMappers = 256;
    // if (args.length > 2) {
    // numMappers = Integer.parseInt(args[2]);
    // }

    FileOutputFormat.setOutputPath(this.job, outputPath);
    // SequenceFileOutputFormat.setCompressOutput(this.job, true);

    if (this.job.get("mapred.output.compress") == null) {
        this.job.setBoolean("mapred.output.compress", true);
    }
    // Just in case compression is on
    this.job.set("mapred.output.compression.type", "BLOCK");

    if (this.job.getBoolean("dkpro.output.writecas", true)) {
        if (this.job.getBoolean("dkpro.output.plaintext", false)) {
            this.job.setOutputFormat(TextOutputFormat.class);
        } else {
            this.job.setOutputFormat(SequenceFileOutputFormat.class);
        }
    } else {
        job.setOutputFormat(NullOutputFormat.class);
    }
    // this.job.set("mapred.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // use compression
    // setup some sensible defaults
    this.job.setMapperClass(this.mapperClass);
    this.job.setReducerClass(this.reducerClass);
    if (getInputFormatClass() != null) {
        this.job.setInputFormat(getInputFormatClass());
    } else {
        this.job.setInputFormat(SequenceFileInputFormat.class);
    }
    // this.job.setOutputFormat(TextOutputFormat.class);
    this.job.setMapOutputKeyClass(Text.class);
    this.job.setMapOutputValueClass(BinCasWithTypeSystemWritable.class);
    this.job.setOutputKeyClass(Text.class);
    this.job.setOutputValueClass(BinCasWithTypeSystemWritable.class);
    this.job.setJobName(this.getClass().getSimpleName());
    // this.job.set("mapred.child.java.opts", "-Xmx1g");
    //        this.job.setInt("mapred.job.map.memory.mb", 1280);
    //        this.job.setInt("mapred.job.reduce.memory.mb", 1280);
    //        this.job.setNumMapTasks(numMappers);
    this.job.setNumReduceTasks(0);
    configure(this.job);

    // create symlinks for distributed resources
    DistributedCache.createSymlink(this.job);
    // sLogger.info("Running job "+job.getJobName());

    RunningJob runningJob = JobClient.runJob(this.job);
    runningJob.waitForCompletion();
    int status = runningJob.getJobState();
    if (status == JobStatus.SUCCEEDED) {
        return 0;
    } else if (status == JobStatus.FAILED) {
        return 1;
    } else if (status == JobStatus.KILLED) {
        return 2;
    } else {
        return 3;
    }

}