List of usage examples for org.apache.hadoop.mapreduce Job isSuccessful
public boolean isSuccessful() throws IOException
From source file:org.apache.accumulo.utils.metanalysis.FilterMeta.java
License:Apache License
@Override public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); @SuppressWarnings("deprecation") Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); Path paths[] = new Path[args.length - 1]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); }//from w ww . ja va2 s. co m job.setInputFormatClass(LogFileInputFormat.class); LogFileInputFormat.setInputPaths(job, paths); job.setOutputFormatClass(LogFileOutputFormat.class); LogFileOutputFormat.setOutputPath(job, new Path(args[args.length - 1])); job.setMapperClass(FilterMapper.class); job.setNumReduceTasks(0); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.utils.metanalysis.IndexMeta.java
License:Apache License
@Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(IndexMeta.class.getName(), args); String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); @SuppressWarnings("deprecation") Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); List<String> logFiles = Arrays.asList(args).subList(4, args.length); Path paths[] = new Path[logFiles.size()]; int count = 0; for (String logFile : logFiles) { paths[count++] = new Path(logFile); }/*from w ww .ja va 2 s . c om*/ job.setInputFormatClass(LogFileInputFormat.class); LogFileInputFormat.setInputPaths(job, paths); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setZooKeeperInstance(job, opts.getConf()); AccumuloOutputFormat.setConnectorInfo(job, opts.principal, opts.getToken()); AccumuloOutputFormat.setCreateTables(job, false); job.setMapperClass(IndexMapper.class); Connector conn = opts.getConnector(); try { conn.tableOperations().create("createEvents"); } catch (TableExistsException tee) { Logger.getLogger(IndexMeta.class).warn("Table createEvents exists"); } try { conn.tableOperations().create("tabletEvents"); } catch (TableExistsException tee) { Logger.getLogger(IndexMeta.class).warn("Table tabletEvents exists"); } job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java
License:Apache License
public void testBlurOutputFormatCleanupDuringJobKillTest() throws IOException, InterruptedException, ClassNotFoundException { Path input = getInDir();/*from w ww.j av a 2 s . c om*/ Path output = getOutDir(); _fileSystem.delete(input, true); _fileSystem.delete(output, true); // 1500 * 50 = 75,000 writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1"); // 100 * 5000 = 500,000 writeRecordsFile(new Path(input, "part2"), 1, 5000, 2000, 100, "cf1"); Job job = Job.getInstance(_conf, "blur index"); job.setJarByClass(BlurOutputFormatTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, input); CsvBlurMapper.addColumns(job, "cf1", "col"); Path tablePath = new Path(new Path(_root, "table"), "test"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(2); tableDescriptor.setTableUri(tablePath.toString()); tableDescriptor.setName("test"); createShardDirectories(getOutDir(), 2); BlurOutputFormat.setupJob(job, tableDescriptor); BlurOutputFormat.setOutputPath(job, output); BlurOutputFormat.setIndexLocally(job, false); job.submit(); boolean killCalled = false; while (!job.isComplete()) { Thread.sleep(1000); System.out.printf("Killed [" + killCalled + "] Map [%f] Reduce [%f]%n", job.mapProgress() * 100, job.reduceProgress() * 100); if (job.reduceProgress() > 0.7 && !killCalled) { job.killJob(); killCalled = true; } } assertFalse(job.isSuccessful()); for (int i = 0; i < tableDescriptor.getShardCount(); i++) { Path path = new Path(output, ShardUtil.getShardName(i)); FileSystem fileSystem = path.getFileSystem(job.getConfiguration()); FileStatus[] listStatus = fileSystem.listStatus(path); assertEquals(toString(listStatus), 0, listStatus.length); } }
From source file:org.apache.giraph.io.hbase.TestHBaseRootMarkerVertextFormat.java
License:Apache License
@Test public void testHBaseInputOutput() throws Exception { if (System.getProperty("prop.mapred.job.tracker") != null) { if (log.isInfoEnabled()) log.info("testHBaseInputOutput: Ignore this test if not local mode."); return;/*from w w w.j av a 2 s .co m*/ } File jarTest = new File(System.getProperty("prop.jarLocation")); if (!jarTest.exists()) { fail("Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. " + "Make sure you built the main Giraph artifact?."); } FileSystem fs = null; Path hbaseRootdir = null; try { MiniHBaseCluster cluster = testUtil.startMiniCluster(1); cluster.waitForActiveAndReadyMaster(); testUtil.startMiniMapReduceCluster(); // Let's set up the hbase root directory. Configuration conf = testUtil.getConfiguration(); try { fs = testUtil.getTestFileSystem(); String randomStr = UUID.randomUUID().toString(); String tmpdir = System.getProperty("java.io.tmpdir") + "/" + randomStr + "/"; hbaseRootdir = fs.makeQualified(new Path(tmpdir)); conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); fs.mkdirs(hbaseRootdir); } catch (IOException ioe) { fail("Could not create hbase root directory."); } //First let's load some data using ImportTsv into our mock table. String INPUT_FILE = hbaseRootdir.toString() + "/graph.csv"; String[] args = new String[] { "-Dimporttsv.columns=HBASE_ROW_KEY,cf:" + QUALIFER, "-Dimporttsv.separator=" + "\u002c", TABLE_NAME, INPUT_FILE }; GenericOptionsParser opts = new GenericOptionsParser(testUtil.getConfiguration(), args); args = opts.getRemainingArgs(); fs = FileSystem.get(conf); fs.setConf(conf); Path inputPath = fs.makeQualified(new Path(hbaseRootdir, "graph.csv")); FSDataOutputStream op = fs.create(inputPath, true); String line1 = "0001,0002\n"; String line2 = "0002,0004\n"; String line3 = "0003,0005\n"; String line4 = "0004,-1\n"; String line5 = "0005,-1\n"; op.write(line1.getBytes()); op.write(line2.getBytes()); op.write(line3.getBytes()); op.write(line4.getBytes()); op.write(line5.getBytes()); op.close(); final byte[] FAM = Bytes.toBytes(FAMILY); final byte[] TAB = Bytes.toBytes(TABLE_NAME); HTableDescriptor desc = new HTableDescriptor(TAB); desc.addFamily(new HColumnDescriptor(FAM)); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.isTableAvailable(TABLE_NAME)) { hbaseAdmin.disableTable(TABLE_NAME); hbaseAdmin.deleteTable(TABLE_NAME); } hbaseAdmin.createTable(desc); // Do the import Job job = ImportTsv.createSubmittableJob(conf, args); job.waitForCompletion(false); assertTrue(job.isSuccessful()); if (log.isInfoEnabled()) log.info("ImportTsv successful. Running HBase Giraph job."); // Now operate over HBase using Vertex I/O formats conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME); GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName()); GiraphConfiguration giraphConf = giraphJob.getConfiguration(); setupConfiguration(giraphJob); giraphConf.setComputationClass(EdgeNotification.class); giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class); giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class); assertTrue(giraphJob.run(true)); if (log.isInfoEnabled()) log.info("Giraph job successful. Checking output qualifier."); // Do a get on row 0002, it should have a parent of 0001 // if the outputFormat worked. HTable table = new HTable(conf, TABLE_NAME); Result result = table.get(new Get("0002".getBytes())); byte[] parentBytes = result.getValue(FAMILY.getBytes(), OUTPUT_FIELD.getBytes()); assertNotNull(parentBytes); assertTrue(parentBytes.length > 0); assertEquals("0001", Bytes.toString(parentBytes)); } finally { testUtil.shutdownMiniMapReduceCluster(); testUtil.shutdownMiniCluster(); } }
From source file:org.apache.gobblin.runtime.mapreduce.MRTask.java
License:Apache License
@Override public void run() { try {/*from www . j a va 2 s . c o m*/ Job job = createJob(); if (job == null) { log.info("No MR job created. Skipping."); this.workingState = WorkUnitState.WorkingState.SUCCESSFUL; this.eventSubmitter.submit(Events.MR_JOB_SKIPPED); onSkippedMRJob(); return; } job.submit(); log.info("MR tracking URL {} for job {}", job.getTrackingURL(), job.getJobName()); this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL()); job.waitForCompletion(false); this.mrJob = job; if (job.isSuccessful()) { this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL()); this.onMRTaskComplete(true, null); } else { this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL()); this.onMRTaskComplete(false, new IOException(String.format("MR Job:%s is not successful", job.getTrackingURL()))); } } catch (Throwable t) { log.error("Failed to run MR job.", t); this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage()); this.onMRTaskComplete(false, t); } }
From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* w w w. j ava2s. com*/ */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); //setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = Time.monotonicNow(); job.waitForCompletion(true); if (!job.isSuccessful()) { System.out.println("Job " + job.getJobID() + " failed!"); System.exit(1); } final double duration = (Time.monotonicNow() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:org.apache.hcatalog.mapreduce.TestHCatDynamicPartitioned.java
License:Apache License
protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); runMRRead(NUM_RECORDS);// w ww . j ava2 s. co m //Read with partition filter runMRRead(4, "p1 = \"0\""); runMRRead(8, "p1 = \"1\" or p1 = \"3\""); runMRRead(4, "p1 = \"4\""); // read from hive to test String query = "select * from " + tableName; int retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_RECORDS, res.size()); //Test for duplicate publish IOException exc = null; try { generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); if (HCatUtil.isHadoop23()) { Assert.assertTrue(job.isSuccessful() == false); } } catch (IOException e) { exc = e; } if (!HCatUtil.isHadoop23()) { assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertTrue( "Got exception of type [" + ((HCatException) exc).getErrorType().toString() + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType())); } query = "show partitions " + tableName; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_PARTITIONS, res.size()); query = "select * from " + tableName; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_RECORDS, res.size()); }
From source file:org.apache.hcatalog.mapreduce.TestSequenceFileReadWrite.java
License:Apache License
@Test public void testSequenceTableWriteReadMR() throws Exception { Initialize();//from w w w.j ava2 s . com String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; driver.run("drop table demo_table_2"); int retCode1 = driver.run(createTable).getResponseCode(); assertTrue(retCode1 == 0); Configuration conf = new Configuration(); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(hiveConf.getAllProperties())); Job job = new Job(conf, "Write-hcat-seq-table"); job.setJarByClass(TestSequenceFileReadWrite.class); job.setMapperClass(Map.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); HCatOutputFormat.setOutput(job, OutputJobInfo.create(MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setSchema(job, getSchema()); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); if (!HCatUtil.isHadoop23()) { new FileOutputCommitterContainer(job, null).commitJob(job); } assertTrue(job.isSuccessful()); server.setBatchOn(); server.registerQuery("C = load 'default.demo_table_2' using org.apache.hcatalog.pig.HCatLoader();"); server.executeBatch(); Iterator<Tuple> XIter = server.openIterator("C"); int numTuplesRead = 0; while (XIter.hasNext()) { Tuple t = XIter.next(); assertEquals(3, t.size()); assertEquals(t.get(0).toString(), "" + numTuplesRead); assertEquals(t.get(1).toString(), "a" + numTuplesRead); assertEquals(t.get(2).toString(), "b" + numTuplesRead); numTuplesRead++; } assertEquals(input.length, numTuplesRead); }
From source file:org.apache.hcatalog.mapreduce.TestSequenceFileReadWrite.java
License:Apache License
@Test public void testTextTableWriteReadMR() throws Exception { Initialize();//from w w w . ja v a2s. c om String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; driver.run("drop table demo_table_3"); int retCode1 = driver.run(createTable).getResponseCode(); assertTrue(retCode1 == 0); Configuration conf = new Configuration(); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(hiveConf.getAllProperties())); Job job = new Job(conf, "Write-hcat-text-table"); job.setJarByClass(TestSequenceFileReadWrite.class); job.setMapperClass(Map.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(0); TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); HCatOutputFormat.setOutput(job, OutputJobInfo.create(MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setSchema(job, getSchema()); assertTrue(job.waitForCompletion(true)); if (!HCatUtil.isHadoop23()) { new FileOutputCommitterContainer(job, null).commitJob(job); } assertTrue(job.isSuccessful()); server.setBatchOn(); server.registerQuery("D = load 'default.demo_table_3' using org.apache.hcatalog.pig.HCatLoader();"); server.executeBatch(); Iterator<Tuple> XIter = server.openIterator("D"); int numTuplesRead = 0; while (XIter.hasNext()) { Tuple t = XIter.next(); assertEquals(3, t.size()); assertEquals(t.get(0).toString(), "" + numTuplesRead); assertEquals(t.get(1).toString(), "a" + numTuplesRead); assertEquals(t.get(2).toString(), "b" + numTuplesRead); numTuplesRead++; } assertEquals(input.length, numTuplesRead); }
From source file:org.apache.hive.hcatalog.mapreduce.TestHCatDynamicPartitioned.java
License:Apache License
protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask, String customDynamicPathPattern) throws Exception { generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask, customDynamicPathPattern); runMRRead(NUM_RECORDS);//ww w.j av a 2 s . c o m //Read with partition filter runMRRead(4, "p1 = \"0\""); runMRRead(8, "p1 = \"1\" or p1 = \"3\""); runMRRead(4, "p1 = \"4\""); // read from hive to test String query = "select * from " + tableName; int retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_RECORDS, res.size()); //Test for duplicate publish IOException exc = null; try { generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false, true, customDynamicPathPattern); if (HCatUtil.isHadoop23()) { Assert.assertTrue(job.isSuccessful() == false); } } catch (IOException e) { exc = e; } if (!HCatUtil.isHadoop23()) { assertTrue(exc != null); assertTrue(exc instanceof HCatException); assertTrue( "Got exception of type [" + ((HCatException) exc).getErrorType().toString() + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED " + "or ERROR_DUPLICATE_PARTITION", (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) || (ErrorType.ERROR_DUPLICATE_PARTITION == ((HCatException) exc).getErrorType())); } query = "show partitions " + tableName; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_PARTITIONS, res.size()); query = "select * from " + tableName; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new Exception("Error " + retCode + " running query " + query); } res = new ArrayList<String>(); driver.getResults(res); assertEquals(NUM_RECORDS, res.size()); }