List of usage examples for org.apache.hadoop.mapreduce CounterGroup findCounter
T findCounter(String counterName);
From source file:cascading.stats.hadoop.HadoopStepCounterCache.java
License:Open Source License
@Override protected long getCounterValue(Counters counters, String groupName, String counterName) { CounterGroup counterGroup = counters.getGroup(groupName); if (counterGroup == null) return 0; // getCounter actually searches the display name, wtf // in theory this is lazily created if does not exist, but don't rely on it Counter counterValue = counterGroup.findCounter(counterName); if (counterValue == null) return 0; return counterValue.getValue(); }
From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java
License:Open Source License
@Override public int run(String[] args) throws Exception { final String jobName = this.getClass().getName(); options.parseArgs(jobName, args);/*from ww w .j av a 2s .c o m*/ try { final int totalMapSlots = getConf().getInt("mapred.map.tasks", DataCompatibilityTestCli.DEFAULT_NUM_ROWS); if (-1 == options.test.numRows) { options.test.numRows = totalMapSlots; } final TableOperations ops = options.connection.getConnector().tableOperations(); final List<String> names = options.test.getTableNames(ops); int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0); if (-1 != options.test.numReduceSlots) { totalReduceSlots = options.test.numReduceSlots; } if (0 == totalReduceSlots) { totalReduceSlots = names.size(); } final int reducesPerJob = Math.max(1, totalReduceSlots / names.size()); final List<Job> jobs = new ArrayList(); for (String name : names) { final Job job = new Job(getConf(), jobName + " " + name); job.setJarByClass(this.getClass()); options.input.useAccumuloInputFormat(job, name); job.setMapperClass(DataVerifyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(options.test.output, name)); job.setNumReduceTasks(reducesPerJob); job.submit(); jobs.add(job); } boolean success = true; final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length; final long numCellsPerFamily = options.test.qualifiers * options.test.numRows; for (Job job : jobs) { success &= job.waitForCompletion(true); final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName()); if (null == group) { log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper."); success = false; } else { final Counter badCounter = group.findCounter(BAD_COUNTER); if (null != badCounter && 0 < badCounter.getValue()) { log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue() + " entries with bad checksums."); success = false; } int numRows = 0; int numFamilies = 0; for (Counter counter : group) { if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) { numRows++; if (numCellsPerRow != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerRow + " cells, but instead has " + counter.getValue()); success = false; } } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) { numFamilies++; if (numCellsPerFamily != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerFamily + " cells, but instead has " + counter.getValue()); success = false; } } } if (options.test.numRows != numRows) { log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows + " rows, but has " + numRows); success = false; } if (DataCompatibilityLoad.FAMILIES.length != numFamilies) { log.error("Job '" + job.getJobName() + "' is supposed to have " + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies); success = false; } } } if (success) { log.info("All internal checks passed."); } else { log.info("Some checks failed. see log."); } return success ? 0 : 1; } finally { options.input.close(); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
public void testValidationPass() { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 100); Counters counters = new Counters(); CounterGroup grp = counters.getGroup(CopyMapper.Counter.class.getName()); grp.findCounter(CopyMapper.Counter.BYTES_COPIED.name()).increment(50); grp.findCounter(CopyMapper.Counter.BYTES_FAILED.name()).increment(20); grp.findCounter(CopyMapper.Counter.BYTES_SKIPPED.name()).increment(30); counterProvider.setCounters(counters); try {//from ww w. j av a 2s .c o m TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); final String[] statusString = new String[1]; try { Mockito.doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { LOG.info("XXXX crap I am called now " + invocationOnMock.getArguments()[0]); statusString[0] = (String) invocationOnMock.getArguments()[0]; return null; //To change body of implemented methods use File | Settings | File Templates. } }).when(taskAttemptContext).setStatus(Mockito.anyString()); } catch (Throwable e) { } try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } } finally { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0); counterProvider.setCounters(EMPTY_COUNTERS); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Ignore @Test//from w ww . jav a2s . c o m public void testCounterProvider() { try { Job job = MockJobTracker.getJobForClient(); Counters a = EMPTY_COUNTERS; CounterGroup grp = a.getGroup("abc"); Counter cntr = grp.findCounter("counter"); cntr.increment(100); CounterProvider cp = new CounterProvider(a); job.submit(); Assert.assertEquals(job.getCounters(), a); } catch (Exception e) { LOG.error("Exception encountered ", e); } }
From source file:com.tomslabs.grid.avro.AvroWordCountTest.java
License:Apache License
@Test public void testMapReduce() throws Throwable { FileSystem fs = FileSystem.get(localConf); String inputFile = AvroIOUtils.createAvroInputFile(inputDir, "foo", "foo", "bar", "baz", "foo", "baz"); AvroIOUtils.dumpAvroFiles(inputDir); Path input = localFileToPath(inputFile).getParent(); Path countOutput = new Path(outputDir.getAbsolutePath()); fs.delete(countOutput, true);//from w w w . ja v a2s. c o m Job countJob = AvroWordCount.createSubmitableJob(localConf, input, countOutput); assertTrue("count job failed", countJob.waitForCompletion(true)); CounterGroup group = countJob.getCounters().getGroup("org.apache.hadoop.mapred.Task$Counter"); assertEquals("Wrong number of mapper input records", 6, group.findCounter("MAP_INPUT_RECORDS").getValue()); assertEquals("Wrong number of mapper output records", 6, group.findCounter("MAP_OUTPUT_RECORDS").getValue()); assertEquals("Wrong number of reduce output records", 3, group.findCounter("REDUCE_OUTPUT_RECORDS").getValue()); AvroIOUtils.dumpAvroFiles(outputDir); Map<String, Integer> res = readOutput(outputDir); assertEquals(3, res.size()); assertTrue(res.containsKey("foo")); assertEquals(3, res.get("foo").intValue()); assertTrue(res.containsKey("bar")); assertEquals(1, res.get("bar").intValue()); assertTrue(res.containsKey("baz")); assertEquals(2, res.get("baz").intValue()); }
From source file:io.covert.dns.collection.CollectionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { usage(""); }/* ww w. ja v a 2 s. c o m*/ String dclass = args[0]; String types = args[1]; String inDir = args[2]; String outDir = args[3]; Configuration conf = getConf(); if (conf.get("dns.collection.num.resolvers") == null) conf.setInt("dns.collection.num.resolvers", 50); if (conf.get("dns.collection.nameservers") == null) conf.set("dns.collection.nameservers", "127.0.0.1"); Job job = new Job(conf); job.setJobName(CollectionJob.class.getSimpleName() + ": types=" + types + ", dclass=" + dclass + " inDir=" + inDir + ", outDir=" + outDir + ", resolvers=" + conf.get("dns.collection.nameservers")); job.setJarByClass(getClass()); job.setMapperClass(CollectionMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(DnsRequestInputFormat.class); DnsRequestInputFormat.setInputPaths(job, new Path(inDir)); DnsRequestInputFormat.configure(job, dclass.toUpperCase(), Arrays.asList(types.split(",")), Arrays.asList("")); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; CounterGroup counters = job.getCounters().getGroup(CollectionMapper.RESOLVER_GROUP); Counter constructMessageMS = counters.findCounter(CollectionMapper.CONSTRUCT_MESSAGE_MS); Counter parseResponseMS = counters.findCounter(CollectionMapper.PARSE_RESPONSE_MS); Counter performRequestMS = counters.findCounter(CollectionMapper.PERFORM_REQUEST_MS); Counter totalRequestHandlingMS = counters.findCounter(CollectionMapper.TOTAL_REQUEST_HANDLING_MS); Log.info("Total ConstructMessage percent: " + (double) (constructMessageMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); Log.info("Total ParseResponse percent: " + (double) (parseResponseMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); Log.info("Total PerformRequest percent: " + (double) (performRequestMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); return retVal; }
From source file:kogiri.mapreduce.preprocess.indexing.stage3.KmerStatisticsBuilder.java
License:Open Source License
private int runJob(PreprocessorConfig ppConfig) throws Exception { // check config validatePreprocessorConfig(ppConfig); // configuration Configuration conf = this.getConf(); // set user configuration ppConfig.getClusterConfiguration().configureTo(conf); ppConfig.saveTo(conf);/*from w w w .j a v a2 s . c o m*/ Path[] inputFiles = KmerIndexHelper.getAllKmerIndexIndexFilePath(conf, ppConfig.getKmerIndexPath()); for (Path inputFile : inputFiles) { LOG.info(inputFile); } boolean job_result = true; List<Job> jobs = new ArrayList<Job>(); for (int round = 0; round < inputFiles.length; round++) { Path roundInputFile = inputFiles[round]; Path[] roundInputKmerIndexPartFiles = KmerIndexHelper.getKmerIndexPartFilePath(conf, roundInputFile); Job job = new Job(conf, "Kogiri Preprocessor - Computing Kmer Statistics (" + round + " of " + inputFiles.length + ")"); job.setJarByClass(KmerStatisticsBuilder.class); // Mapper job.setMapperClass(KmerStatisticsBuilderMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); // Specify key / value job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); // Inputs Path[] kmerIndexPartDataFiles = KmerIndexHelper.getAllKmerIndexPartDataFilePath(conf, roundInputKmerIndexPartFiles); SequenceFileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(kmerIndexPartDataFiles)); LOG.info("Input file : "); LOG.info("> " + roundInputFile.toString()); // Outputs job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); // Execute job and return status boolean result = job.waitForCompletion(true); jobs.add(job); // check results if (result) { CounterGroup uniqueGroup = job.getCounters() .getGroup(KmerStatisticsHelper.getCounterGroupNameUnique()); CounterGroup totalGroup = job.getCounters() .getGroup(KmerStatisticsHelper.getCounterGroupNameTotal()); CounterGroup squareGroup = job.getCounters() .getGroup(KmerStatisticsHelper.getCounterGroupNameSquare()); CounterGroup logTFSquareGroup = job.getCounters() .getGroup(KmerStatisticsHelper.getCounterGroupNameLogTFSquare()); Iterator<Counter> uniqueIterator = uniqueGroup.iterator(); while (uniqueIterator.hasNext()) { long count = 0; long length = 0; long square = 0; double logTFSquare = 0; double real_mean = 0; double stddev = 0; double tf_cosnorm_base = 0; Counter uniqueCounter = uniqueIterator.next(); Counter totalCounter = totalGroup.findCounter(uniqueCounter.getName()); Counter squareCounter = squareGroup.findCounter(uniqueCounter.getName()); Counter logTFSquareCounter = logTFSquareGroup.findCounter(uniqueCounter.getName()); count = uniqueCounter.getValue(); length = totalCounter.getValue(); square = squareCounter.getValue(); logTFSquare = logTFSquareCounter.getValue() / 1000.0; tf_cosnorm_base = Math.sqrt(logTFSquare); real_mean = (double) length / (double) count; // stddev = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2) double mean = Math.pow(real_mean, 2); double term = (double) square / (double) count; stddev = Math.sqrt(term - mean); LOG.info("distinct k-mers " + uniqueCounter.getName() + " : " + count); LOG.info("total k-mers " + uniqueCounter.getName() + " : " + length); LOG.info("average " + uniqueCounter.getName() + " : " + real_mean); LOG.info("std-deviation " + uniqueCounter.getName() + " : " + stddev); LOG.info("tf-cos-norm-base " + uniqueCounter.getName() + " : " + tf_cosnorm_base); Path outputHadoopPath = new Path(ppConfig.getKmerStatisticsPath(), KmerStatisticsHelper.makeKmerStatisticsFileName(uniqueCounter.getName())); FileSystem fs = outputHadoopPath.getFileSystem(conf); KmerStatistics statistics = new KmerStatistics(); statistics.setSampleName(uniqueCounter.getName()); statistics.setKmerSize(ppConfig.getKmerSize()); statistics.setUniqueKmers(count); statistics.setTotalKmers(length); statistics.setAverageFrequency(real_mean); statistics.setStdDeviation(stddev); statistics.setTFCosineNormBase(tf_cosnorm_base); statistics.saveTo(fs, outputHadoopPath); } } if (!result) { LOG.error("job failed at round " + round + " of " + inputFiles.length); job_result = false; break; } } // report if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) { Report report = new Report(); report.addJob(jobs); report.writeTo(ppConfig.getReportPath()); } return job_result ? 0 : 1; }
From source file:org.apache.mrql.MapReducePlan.java
License:Apache License
/** find the number of records in the hadoop MapReduce job output */ public final static long outputRecords(Job job) throws Exception { CounterGroup cg = job.getCounters().getGroup("org.apache.hadoop.mapred.Task$Counter"); long rc = cg.findCounter("REDUCE_OUTPUT_RECORDS").getValue(); if (rc == 0)/*ww w . ja va 2 s. c o m*/ return cg.findCounter("MAP_OUTPUT_RECORDS").getValue(); return rc; }
From source file:org.apache.nutch.crawl.DeduplicationJob.java
License:Apache License
public int run(String[] args) throws IOException { if (args.length < 1) { System.err.println(/*w w w.j a v a 2 s. c o m*/ "Usage: DeduplicationJob <crawldb> [-group <none|host|domain>] [-compareOrder <score>,<fetchTime>,<httpsOverHttp>,<urlLength>]"); return 1; } String group = "none"; Path crawlDb = new Path(args[0]); String compareOrder = "score,fetchTime,urlLength"; for (int i = 1; i < args.length; i++) { if (args[i].equals("-group")) group = args[++i]; if (args[i].equals("-compareOrder")) { compareOrder = args[++i]; if (compareOrder.indexOf("score") == -1 || compareOrder.indexOf("fetchTime") == -1 || compareOrder.indexOf("urlLength") == -1) { System.err .println("DeduplicationJob: compareOrder must contain score, fetchTime and urlLength."); return 1; } } } SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("DeduplicationJob: starting at " + sdf.format(start)); Path tempDir = new Path(crawlDb, "dedup-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Job job = NutchJob.getInstance(getConf()); Configuration conf = job.getConfiguration(); job.setJobName("Deduplication on " + crawlDb); conf.set(DEDUPLICATION_GROUP_MODE, group); conf.set(DEDUPLICATION_COMPARE_ORDER, compareOrder); job.setJarByClass(DeduplicationJob.class); FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, tempDir); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(DBFilter.class); job.setReducerClass(DedupReducer.class); FileSystem fs = tempDir.getFileSystem(getConf()); try { boolean success = job.waitForCompletion(true); if (!success) { String message = "Crawl job did not succeed, job status:" + job.getStatus().getState() + ", reason: " + job.getStatus().getFailureInfo(); LOG.error(message); fs.delete(tempDir, true); throw new RuntimeException(message); } CounterGroup g = job.getCounters().getGroup("DeduplicationJobStatus"); if (g != null) { Counter counter = g.findCounter("Documents marked as duplicate"); long dups = counter.getValue(); LOG.info("Deduplication: " + (int) dups + " documents marked as duplicates"); } } catch (IOException | InterruptedException | ClassNotFoundException e) { LOG.error("DeduplicationJob: " + StringUtils.stringifyException(e)); fs.delete(tempDir, true); return -1; } // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Deduplication: Updating status of duplicate urls into crawl db."); } Job mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(StatusUpdateReducer.class); mergeJob.setJarByClass(DeduplicationJob.class); fs = crawlDb.getFileSystem(getConf()); Path outPath = FileOutputFormat.getOutputPath(job); Path lock = CrawlDb.lock(getConf(), crawlDb, false); try { boolean success = mergeJob.waitForCompletion(true); if (!success) { String message = "Crawl job did not succeed, job status:" + mergeJob.getStatus().getState() + ", reason: " + mergeJob.getStatus().getFailureInfo(); LOG.error(message); fs.delete(tempDir, true); NutchJob.cleanupAfterFailure(outPath, lock, fs); throw new RuntimeException(message); } } catch (IOException | InterruptedException | ClassNotFoundException e) { LOG.error("DeduplicationMergeJob: " + StringUtils.stringifyException(e)); fs.delete(tempDir, true); NutchJob.cleanupAfterFailure(outPath, lock, fs); return -1; } CrawlDb.install(mergeJob, crawlDb); // clean up fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info("Deduplication finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); return 0; }
From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java
License:Apache License
protected void _testMultipleOutputs(boolean withCounters) throws Exception { String input = "a\nb\nc\nd\ne\nc\nd\ne"; //Configuration conf = createJobConf(); Configuration conf = new Configuration(); Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); job.waitForCompletion(true);/*from ww w . j av a 2s . c om*/ // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OUT_DIR.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(OUT_DIR); for (FileStatus status : statuses) { String fileName = status.getPath().getName(); if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) { namedOutputCount++; } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) { valueBasedOutputCount++; } } //assertEquals(9, namedOutputCount); //assertEquals(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000")))); int count = 0; String line = reader.readLine(); while (line != null) { assertTrue(line.endsWith(TEXT)); line = reader.readLine(); count++; } reader.close(); assertFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf); assertEquals(IntWritable.class, seqReader.getKeyClass()); assertEquals(Text.class, seqReader.getValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.next(key, value)) { assertEquals(SEQUENCE, value.toString()); count++; } seqReader.close(); assertFalse(count == 0); if (withCounters) { CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName()); assertEquals(9, counters.size()); assertEquals(4, counters.findCounter(TEXT).getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue()); assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue()); assertEquals(2, counters.findCounter("a").getValue()); assertEquals(2, counters.findCounter("b").getValue()); assertEquals(4, counters.findCounter("c").getValue()); assertEquals(4, counters.findCounter("d").getValue()); assertEquals(4, counters.findCounter("e").getValue()); } }