List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:com.cloudera.sqoop.shims.CDH3Shim.java
License:Apache License
@Override public long getNumMapOutputRecords(Job job) throws IOException, InterruptedException { return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS") .getValue();// w w w . jav a 2s .c o m }
From source file:com.cloudera.sqoop.shims.CDH3Shim.java
License:Apache License
@Override public long getNumMapInputRecords(Job job) throws IOException, InterruptedException { return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS") .getValue();/* w w w.j a v a2 s . c o m*/ }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testSplits() throws Exception { BufferedWriter writer = new BufferedWriter(new FileWriter(IN)); for (int i = 0; i < 10000; i++) { writer.write("str1" + " " + "str2" + " " + "30" + " " + "4000" + "\n"); }/*from ww w .j a va 2 s .c o m*/ writer.close(); Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long")); InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ', TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER, FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING); Configuration conf = getConf(); conf.setLong("mapred.min.split.size", 10 * 1024); conf.setLong("dfs.block.size", 10 * 1024); conf.setLong("mapred.max.split.size", 10 * 1024); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); MapOnlyJobBuilder mapOnly = new MapOnlyJobBuilder(conf); mapOnly.addInput(new Path(IN), inputFormat, new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { Assert.assertEquals("str1", key.get("a").toString()); Assert.assertEquals("str2", key.get("b").toString()); Assert.assertEquals((Integer) 30, (Integer) key.get("c")); Assert.assertEquals((Long) 4000l, (Long) key.get("d")); context.getCounter("stats", "nlines").increment(1); }; }); HadoopUtils.deleteIfExists(fS, outPath); mapOnly.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mapOnly.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mapOnly.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, new Path(IN)); assertEquals(10000, job.getCounters().getGroup("stats").findCounter("nlines").getValue()); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testInputCompression() throws Exception { Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long")); InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ', TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER, FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING); Configuration conf = getConf(); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); MapOnlyJobBuilder mapOnly = new MapOnlyJobBuilder(conf); mapOnly.addInput(new Path("src/test/resources/*.gz"), inputFormat, new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { Assert.assertNotNull(key.get("a").toString()); Assert.assertNotNull(key.get("b").toString()); Assert.assertTrue((Integer) key.get("c") > 0); Assert.assertTrue((Long) key.get("d") > 0); context.getCounter("stats", "nlines").increment(1); };//from ww w. j a v a 2s .c o m }); HadoopUtils.deleteIfExists(fS, outPath); mapOnly.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mapOnly.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mapOnly.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, new Path(IN)); assertEquals(100, job.getCounters().getGroup("stats").findCounter("nlines").getValue()); }
From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java
License:Apache License
private int processJobCounters(Job job) { int retVal = 0; try {/* w ww .j ava 2s . c om*/ Counters counters = job.getCounters(); long failedCount = counters.findCounter(BLUESHIFT_COUNTER.FAILED_COUNT).getValue(); long successCount = counters.findCounter(BLUESHIFT_COUNTER.SUCCESS_COUNT).getValue(); long verifiedFailedCount = counters.findCounter(BLUESHIFT_COUNTER.VERIFIED_FAILED_COUNT).getValue(); System.out.println( "Total Success Transfers: " + successCount + ", Total Failed Transfers: " + failedCount); if (failedCount > 0 || verifiedFailedCount > 0) { System.err.println("There are failedCount[" + failedCount + "], verifiedFailedCount[" + verifiedFailedCount + "] transfers, Please re-run the job..."); retVal = (int) failedCount; } long verifiedSuccessCount = counters.findCounter(BLUESHIFT_COUNTER.VERIFIED_SUCCESS_COUNT).getValue(); if (successCount != verifiedSuccessCount) { System.err.println("Verification not done for all files : successCount[" + successCount + "], verifiedSuccessCount[" + verifiedSuccessCount + "]."); retVal = (int) (successCount - verifiedSuccessCount); } } catch (Exception e) { System.out.println("Error processing job counters: " + e.getMessage()); retVal = 1; } return retVal; }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Ignore @Test/* w w w . ja v a 2 s.c om*/ public void testCounterProvider() { try { Job job = MockJobTracker.getJobForClient(); Counters a = EMPTY_COUNTERS; CounterGroup grp = a.getGroup("abc"); Counter cntr = grp.findCounter("counter"); cntr.increment(100); CounterProvider cp = new CounterProvider(a); job.submit(); Assert.assertEquals(job.getCounters(), a); } catch (Exception e) { LOG.error("Exception encountered ", e); } }
From source file:com.linkedin.cubert.utils.ScriptStats.java
License:Open Source License
public void addJob(final Job job) { getStats(job).endTime = System.currentTimeMillis(); Counters counters;/*from www .j a v a 2 s . c om*/ try { counters = job.getCounters(); } catch (IOException e) { counters = null; } getStats(job).counters = counters; }
From source file:com.linkedin.hadoop.example.WordCountCounters.java
License:Apache License
/** * Azkaban will look for a method named `run` to start your job. Use this method to setup all the * Hadoop-related configuration for your job and submit it. * * @throws Exception If there is an exception during the configuration or submission of your job *///from w ww . jav a2 s .c o m public void run() throws Exception { _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName())); Job job = Job.getInstance(getConf()); job.setJarByClass(WordCountJob.class); job.setJobName(_name); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountCombiner.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); String inputPath = _properties.getProperty("input.path"); String outputPath = _properties.getProperty("output.path"); boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false")); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // Before we submit the job, remove the old the output directory if (forceOverwrite) { FileSystem fs = FileSystem.get(job.getConfiguration()); fs.delete(FileOutputFormat.getOutputPath(job), true); } // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If // you don't use Kerberos security for your Hadoop cluster, you don't need this code. if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Submit the job for execution _logger.info(String.format("About to submit the job named %s", _name)); boolean succeeded = job.waitForCompletion(true); // Before we return, display our custom counters for the job in the Azkaban logs long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue(); _logger.info(String.format("Read a total of %d input words", inputWords)); // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception if (!succeeded) { throw new Exception(String.format("Azkaban job %s failed", _name)); } }
From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);//from ww w. j a v a 2s. c o m job.setJarByClass(AggregationPhaseJob.class); FileSystem fs = FileSystem.get(getConf()); Configuration configuration = job.getConfiguration(); // Properties LOGGER.info("Properties {}", props); // Input Path String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString()); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode()); job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); // Map config job.setMapperClass(AggregationMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Reduce config job.setReducerClass(AggregationReducer.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); AvroJob.setOutputKeySchema(job, avroSchema); job.setOutputFormatClass(AvroKeyOutputFormat.class); String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName()); LOGGER.info("Num Reducers : {}", numReducers); if (StringUtils.isNotBlank(numReducers)) { job.setNumReduceTasks(Integer.valueOf(numReducers)); LOGGER.info("Setting num reducers {}", job.getNumReduceTasks()); } job.waitForCompletion(true); Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); if (counter.getValue() == 0) { throw new IllegalStateException("No input records in " + inputPathDir); } counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); for (String metric : thirdeyeConfig.getMetricNames()) { counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); } return job; }
From source file:com.marklogic.contentpump.ContentPump.java
License:Apache License
private static void submitJob(Job job) throws Exception { String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME); // find job jar File cpHomeDir = new File(cpHome); FilenameFilter jobJarFilter = new FilenameFilter() { @Override//from w ww. j ava2 s . c o m public boolean accept(File dir, String name) { if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) { return true; } else { return false; } } }; File[] cpJars = cpHomeDir.listFiles(jobJarFilter); if (cpJars == null || cpJars.length == 0) { throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome); } if (cpJars.length > 1) { throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome); } // set job jar Configuration conf = job.getConfiguration(); conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString()); // find lib jars FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.endsWith(".jar") && !name.startsWith("hadoop")) { return true; } else { return false; } } }; // set lib jars StringBuilder jars = new StringBuilder(); for (File jar : cpHomeDir.listFiles(filter)) { if (jars.length() > 0) { jars.append(','); } jars.append(jar.toURI().toURL().toString()); } conf.set("tmpjars", jars.toString()); if (LOG.isTraceEnabled()) LOG.trace("LIBJARS:" + jars.toString()); job.waitForCompletion(true); AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters()); }