List of usage examples for org.apache.hadoop.mapred RunningJob waitForCompletion
public void waitForCompletion() throws IOException;
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "hbaseBulkOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); //create table conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); createTable(tableName, new String[] { familyName }); String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos", "3,english:three,spanish:tres" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close();/* ww w.jav a 2 s.co m*/ Path interPath = new Path(methodTestDir, "inter"); //create job JobConf job = new JobConf(conf); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWriteOldMapper.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseBulkOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); job.setOutputCommitter(HBaseBulkOutputCommitter.class); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); //test if scratch directory was erased assertFalse(FileSystem.get(job).exists(interPath)); }
From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java
License:Apache License
@Test public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "directOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); //create table createTable(tableName, new String[] { familyName }); String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);/* ww w. j a va2 s . c om*/ FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close(); //create job JobConf job = new JobConf(conf); job.setJobName(testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWrite.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseDirectOutputFormat.class); job.set(TableOutputFormat.OUTPUT_TABLE, tableName); job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } assertEquals(data.length, index); }
From source file:org.apache.hcatalog.hbase.TestHBaseInputFormat.java
License:Apache License
@Test public void TestHBaseInputFormatProjectionReadMR() throws Exception { String tableName = newTableName("mytable"); String tableQuery = "CREATE TABLE " + tableName + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + "testFamily:testQualifier1,testFamily:testQualifier2')"; CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); assertEquals(0, responseTwo.getResponseCode()); HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); boolean doesTableExist = hAdmin.tableExists(tableName); assertTrue(doesTableExist);// w w w. ja va 2 s . co m populateHBaseTable(tableName, 5); Configuration conf = new Configuration(hcatConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties())); // output settings Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); FileSystem fs = getFileSystem(); if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // create job JobConf job = new JobConf(conf); job.setJobName("hbase-scan-column"); job.setJarByClass(this.getClass()); job.setMapperClass(MapReadProjectionHTable.class); job.setInputFormat(HBaseInputFormat.class); //Configure projection schema job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); Job newJob = new Job(job); HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); for (PartInfo partinfo : info.getPartitions()) { for (Entry<String, String> entry : partinfo.getJobProperties().entrySet()) job.set(entry.getKey(), entry.getValue()); } assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); assertFalse(MapReadProjHTable.error); assertEquals(MapReadProjHTable.count, 1); String dropTableQuery = "DROP TABLE " + tableName; CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); assertEquals(0, responseThree.getResponseCode()); boolean isHbaseTableThere = hAdmin.tableExists(tableName); assertFalse(isHbaseTableThere); }
From source file:org.apache.hcatalog.hbase.TestHCatHBaseInputFormat.java
License:Apache License
@Test public void TestHBaseInputFormatProjectionReadMR() throws Exception { String tableName = newTableName("mytable"); String tableQuery = "CREATE TABLE " + tableName + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + "testFamily:testQualifier1,testFamily:testQualifier2')"; CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); assertEquals(0, responseTwo.getResponseCode()); HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); boolean doesTableExist = hAdmin.tableExists(tableName); assertTrue(doesTableExist);/*w w w . j ava2s . co m*/ populateHBaseTable(tableName, 5); Configuration conf = new Configuration(hcatConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties())); // output settings Path outputDir = new Path(getTestDir(), "mapred/testHBaseInputFormatProjectionReadMR"); FileSystem fs = getFileSystem(); if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // create job JobConf job = new JobConf(conf); job.setJobName("hbase-scan-column"); job.setJarByClass(this.getClass()); job.setMapperClass(MapReadProjectionHTable.class); job.setInputFormat(HBaseInputFormat.class); //Configure projection schema job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); Job newJob = new Job(job); HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); for (PartInfo partinfo : info.getPartitions()) { for (Entry<String, String> entry : partinfo.getJobProperties().entrySet()) job.set(entry.getKey(), entry.getValue()); } assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); assertFalse(MapReadProjectionHTable.error); assertEquals(1, MapReadProjectionHTable.count); String dropTableQuery = "DROP TABLE " + tableName; CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); assertEquals(0, responseThree.getResponseCode()); boolean isHbaseTableThere = hAdmin.tableExists(tableName); assertFalse(isHbaseTableThere); }
From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java
License:Apache License
@Test public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "directOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); //create table createTable(tableName, new String[] { familyName }); String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS", "3,english:THREE,spanish:TRES" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);//from ww w.j a v a 2s . c om FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close(); //create job JobConf job = new JobConf(conf); job.setJobName(testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWrite.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); // why we need to set all the 3 properties?? job.setOutputFormat(HiveHBaseTableOutputFormat.class); job.set(HBaseSerDe.HBASE_TABLE_NAME, tableName); job.set(TableOutputFormat.OUTPUT_TABLE, tableName); job.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName", tableName); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } catch (Exception ex) { throw new IOException("Serialization error " + ex.getMessage(), ex); } job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); System.getProperty("java.classpath"); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } assertEquals(data.length, index); }
From source file:org.apache.mahout.avro.text.mapred.AvroDocumentProcessor.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(); if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 0; }/* ww w. j a v a 2 s . com*/ conf.setStrings("io.serializations", new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() }); AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method. conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class); conf.setJarByClass(AvroDocumentProcessor.class); conf.setMapperClass(ProcessorMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(AvroDocument.class); conf.setOutputValueClass(NullWritable.class); conf.setInputFormat(AvroInputFormat.class); conf.setOutputFormat(AvroOutputFormat.class); AvroInputFormat.setAvroInputClass(conf, AvroDocument.class); AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class); Path input = new Path(args[0]); Path output = new Path(args[1]); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); FileInputFormat.addInputPath(conf, input); FileOutputFormat.setOutputPath(conf, output); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); return job.isComplete() ? 0 : 1; }
From source file:org.apache.mahout.avro.text.mapred.AvroDocumentsWordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(); if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 0; }/*from w ww . j a v a 2s . c o m*/ conf.setStrings("io.serializations", new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() }); conf.setJarByClass(AvroDocumentsWordCount.class); conf.setMapperClass(TokenizerMapper.class); conf.setCombinerClass(IntSumReducer.class); conf.setReducerClass(IntSumReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(AvroInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); Path input = new Path(args[0]); Path output = new Path(args[1]); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); AvroInputFormat.setAvroInputClass(conf, AvroDocument.class); FileInputFormat.addInputPath(conf, input); FileOutputFormat.setOutputPath(conf, output); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); return job.isSuccessful() ? 1 : 0; }
From source file:org.apache.mahout.avro.text.mapred.WikipediaToAvroDocuments.java
License:Apache License
/** * Run the job/* w w w . j a v a 2s . c om*/ * * @param input * the input pathname String * @param output * the output pathname String * @param catFile * the file containing the Wikipedia categories * @param exactMatchOnly * if true, then the Wikipedia category must match exactly instead of * simply containing the category string * @param all * if true select all categories */ public static int runJob(String input, String output, String catFile, boolean exactMatchOnly, boolean all) throws IOException { JobClient client = new JobClient(); JobConf conf = new JobConf(WikipediaToAvroDocuments.class); if (log.isInfoEnabled()) { log.info("Input: " + input + " Out: " + output + " Categories: " + catFile + " All Files: " + all); } Path inPath = new Path(input); Path outPath = new Path(output); FileInputFormat.setInputPaths(conf, inPath); FileOutputFormat.setOutputPath(conf, outPath); //AvroOutputFormat.setClass(conf, AvroDocument.class); //AvroOutputFormat.setSchema(conf, AvroDocument._SCHEMA); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(AvroDocument.class); conf.setBoolean("exact.match.only", exactMatchOnly); conf.setBoolean("all.files", all); conf.setMapperClass(WikipediaAvroDocumentMapper.class); conf.setInputFormat(XmlInputFormat.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); if (dfs.exists(outPath)) { dfs.delete(outPath, true); } Set<String> categories = new HashSet<String>(); if (catFile.equals("") == false) { for (String line : new FileLineIterable(new File(catFile))) { categories.add(line.trim().toLowerCase()); } } DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories)); String categoriesStr = setStringifier.toString(categories); conf.set("wikipedia.categories", categoriesStr); client.setConf(conf); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); return job.isSuccessful() ? 1 : 0; }
From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java
License:Apache License
private static void executeJavaMapReduce(String[] args) throws IOException, InterruptedException { JobConf jConf = createSleepMapperReducerJobConf(); final Path input = new Path(args[1]); FileInputFormat.setInputPaths(jConf, input); FileOutputFormat.setOutputPath(jConf, new Path(args[2])); writeToFile(input, jConf, "dummy\n", "data.txt"); JobClient jc = new JobClient(jConf); System.out.println("Submitting MR job"); RunningJob job = jc.submitJob(jConf); System.out.println("Submitted job " + job.getID().toString()); writeToFile(input, jConf, job.getID().toString(), JOB_ID_FILE_NAME); job.waitForCompletion(); jc.monitorAndPrintJob(jConf, job);/*from w w w . j a v a 2 s . com*/ if (job.getJobState() != JobStatus.SUCCEEDED) { System.err.println(job.getJobState() + " job state instead of" + JobStatus.SUCCEEDED); System.exit(-1); } }
From source file:org.dkpro.bigdata.hadoop.DkproHadoopDriver.java
License:Apache License
/** * Runs the UIMA pipeline.// ww w.j a v a 2 s . co m * * @return 0 if Hadoop job succeeded, 1 if job failed, 2 if it was killed, otherwise 3 * * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println( "Usage: " + this.getClass().getSimpleName() + " [hadoop-params] input output [job-params]"); System.exit(1); } this.job = new JobConf(getConf(), DkproHadoopDriver.class); final FileSystem fs = FileSystem.get(this.job); // set the factory class name this.job.set("dkpro.uima.factory", this.getClass().getName()); Path inputPath; if (args[0].contains(",")) { String[] inputPaths = args[0].split(","); inputPath = new Path(inputPaths[0]); for (String path : inputPaths) { FileInputFormat.addInputPath(job, new Path(path)); } } else { inputPath = new Path(args[0]); // input FileInputFormat.setInputPaths(this.job, inputPath); } String outDir = args[1]; if (!getConf().getBoolean("dkpro.output.overwrite", true)) { outDir = getUniqueDirectoryName(outDir, fs); } final Path outputPath = new Path(outDir);// output final CollectionReader reader = buildCollectionReader(); // if a collection reader was defined, import data into hdfs // try { // final Class<?> c = Class.forName("org.apache.hadoop.io.compress.SnappyCodec"); // FileOutputFormat.setOutputCompressorClass(this.job, // (Class<? extends CompressionCodec>) c); // } // catch (final Exception e) { // // } if (reader != null) { final AnalysisEngine xcasWriter = AnalysisEngineFactory.createEngine( CASWritableSequenceFileWriter.class, // createTypeSystemDescription(), CASWritableSequenceFileWriter.PARAM_PATH, inputPath.toString(), CASWritableSequenceFileWriter.PARAM_COMPRESS, true, CASWritableSequenceFileWriter.PARAM_FS, job.get(("fs.default.name"), "file:/")); runPipeline(reader, xcasWriter); } // cleanup previous output fs.delete(outputPath, true); // this is a sensible default for the UKP cluster // int numMappers = 256; // if (args.length > 2) { // numMappers = Integer.parseInt(args[2]); // } FileOutputFormat.setOutputPath(this.job, outputPath); // SequenceFileOutputFormat.setCompressOutput(this.job, true); if (this.job.get("mapred.output.compress") == null) { this.job.setBoolean("mapred.output.compress", true); } // Just in case compression is on this.job.set("mapred.output.compression.type", "BLOCK"); if (this.job.getBoolean("dkpro.output.writecas", true)) { if (this.job.getBoolean("dkpro.output.plaintext", false)) { this.job.setOutputFormat(TextOutputFormat.class); } else { this.job.setOutputFormat(SequenceFileOutputFormat.class); } } else { job.setOutputFormat(NullOutputFormat.class); } // this.job.set("mapred.output.compression.codec", // "org.apache.hadoop.io.compress.GzipCodec"); // use compression // setup some sensible defaults this.job.setMapperClass(this.mapperClass); this.job.setReducerClass(this.reducerClass); if (getInputFormatClass() != null) { this.job.setInputFormat(getInputFormatClass()); } else { this.job.setInputFormat(SequenceFileInputFormat.class); } // this.job.setOutputFormat(TextOutputFormat.class); this.job.setMapOutputKeyClass(Text.class); this.job.setMapOutputValueClass(BinCasWithTypeSystemWritable.class); this.job.setOutputKeyClass(Text.class); this.job.setOutputValueClass(BinCasWithTypeSystemWritable.class); this.job.setJobName(this.getClass().getSimpleName()); // this.job.set("mapred.child.java.opts", "-Xmx1g"); // this.job.setInt("mapred.job.map.memory.mb", 1280); // this.job.setInt("mapred.job.reduce.memory.mb", 1280); // this.job.setNumMapTasks(numMappers); this.job.setNumReduceTasks(0); configure(this.job); // create symlinks for distributed resources DistributedCache.createSymlink(this.job); // sLogger.info("Running job "+job.getJobName()); RunningJob runningJob = JobClient.runJob(this.job); runningJob.waitForCompletion(); int status = runningJob.getJobState(); if (status == JobStatus.SUCCEEDED) { return 0; } else if (status == JobStatus.FAILED) { return 1; } else if (status == JobStatus.KILLED) { return 2; } else { return 3; } }