List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:PairsPMI_M.java
License:Apache License
/** * Runs this tool./*from w ww. j av a 2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } // First MapReduce Job String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - tmp path: " + outputPath + "/tmp"); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the tmp directory if it exists already Path tmpDir = new Path("tmp_wj"); FileSystem.get(getConf()).delete(tmpDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path("tmp_wj")); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time1 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + time1 + " seconds"); numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS") .getValue(); /* * Second MapReduce Job */ LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info("second stage of MapReduce"); LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj"); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); // set the global variable Configuration conf = getConf(); conf.setLong("numRec", numRecords); job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*")); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); // job.setOutputKeyClass(PairOfStrings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapperSecond.class); // job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducerSecond.class); job.setPartitionerClass(MyPartitioner.class); startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time2 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Second job finished in " + time2 + " seconds"); System.out.println("Total time: " + (time1 + time2) + " seconds"); return 0; }
From source file:Analysis.A10_Weekday_v_Weekend_Listens.Listen_History_Weekday_Weekend_Driver.java
/** * @param args the command line arguments */// w w w . ja va2s. c o m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Listen History - Weekday v Weekend"); job.setJarByClass(Listen_History_Weekday_Weekend_Driver.class); job.setMapperClass(Listen_History_Weekday_Weekend_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { for (Counter counter : job.getCounters() .getGroup(Listen_History_Weekday_Weekend_Mapper.DAY_COUNTER_GROUP)) { System.out.println(counter.getDisplayName() + "\t" + counter.getValue()); } } FileSystem.get(conf).delete(new Path(args[1]), true); System.exit(code); }
From source file:Analysis.A9_Max_Activity_By_Time_of_Day.Most_Listens_By_Time_of_Day_Driver.java
/** * @param args the command line arguments *//*from w w w . jav a2 s. co m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Most listens by Time of the Day"); job.setJarByClass(Most_Listens_By_Time_of_Day_Driver.class); job.setMapperClass(Most_Listens_By_Time_of_Day_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { for (Counter counter : job.getCounters() .getGroup(Most_Listens_By_Time_of_Day_Mapper.HOUR_COUNTER_GROUP)) { System.out.println(counter.getDisplayName() + "\t" + counter.getValue()); } } FileSystem.get(conf).delete(new Path(args[1]), true); System.exit(code); }
From source file:bb.BranchAndBound.java
License:Apache License
public static void main(String[] args) throws Exception { /*Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) {/*from www. jav a 2 s. c om*/ System.err.println("Usage: branchandbound <input> <output>"); System.exit(2); } Job job = new Job(conf, "branch and bound"); job.setJarByClass(BranchAndBound.class); job.setMapperClass(BBMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);*/ int n; String[] inputargs = new GenericOptionsParser(new Configuration(), args).getRemainingArgs(); if (inputargs.length != 2) { System.err.println("Usage: branchandbound <data directory> <n>"); System.exit(2); } n = Integer.parseInt(inputargs[1]); String dataDir = inputargs[0]; String prev_output = dataDir + "/input"; /* for( int i = 1 ; i <= n ; i++ ) { for( int j = 0 ; j < 2 ; j++ ) { String input = prev_output ; String output = inputargs[1] + "/iteration" + i + "_" + j ; Job job = getJob(input, output, i, j) ; job.waitForCompletion(true) ; // if failed ???? prev_output = output; } } */ //prev_output = dataDir + "/output" + "/iteration" + 17; long totalNodes = 0; long searchedNodes = 0; long cutbyDEE = 0; int mapTotal = 768; for (int i = 0; i <= n; i++) { iterRound = i; String input = prev_output; String output = dataDir + "/output" + "/iteration" + i; Job job = getJob(input, output, dataDir, i); if (i == n) { numReduceTasks = 1; } //job.setNumMapTasks(200); if (numOutput > mapTotal) { FileInputFormat.setMaxInputSplitSize(job, 10 * (8 * n + 10) + numOutput * (8 * n + 10) / 3000); FileInputFormat.setMinInputSplitSize(job, Math.max((8 * n + 10), numOutput * (8 * n + 10) / 5000)); } else { FileInputFormat.setMaxInputSplitSize(job, (8 * n + 10)); } /* if( i == 0 ) { job.setNumReduceTasks(1); } else { job.setNumReduceTasks(0); } */ job.setNumReduceTasks(0); job.waitForCompletion(true); // if failed ???? prev_output = output; Counters counters = job.getCounters(); Counter counter = counters.findCounter("MyCounter", "Map Output Counter"); numOutput = counter.getValue(); totalNodes += numOutput; cutbyDEE += counters.findCounter("MyCounter", "Cut By DEE").getValue(); searchedNodes += totalNodes + cutbyDEE + counters.findCounter("MyCounter", "Cut By Bound").getValue(); System.out.println(numOutput + " " + (8 * n + 10) + " " + (numOutput * (8 * n + 10) / 768)); } System.out.println("searchedNodes " + searchedNodes); System.out.println(totalNodes); System.out.println("cut by dee " + cutbyDEE); }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir, String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { int prefixSize = opt.prefixLength; System.out/*w w w . ja v a2 s .c o m*/ .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir); Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class, AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class, IntWritable.class, TextOutputFormat.class); job.setJobName(info); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); if (nrLines != -1) { conf.setLong(NUMBER_OF_LINES_KEY, nrLines); } if (cacheFile != null) { addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf); } runJob(job, info); if (prefixSize <= i && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) { return false; } if (prefixSize < i) { System.out.println( "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize); } return true; }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRSpeciesRank(String args[], int iterCnt) { long newCounterVal = 0; long totalLinks = 1; // Initialize to 1 to prevent divide by zero long totalIterations = 0; Job theJob = null; conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Iter"); conf.setNumReduceTasks(5);/*w w w . j a v a 2s .c o m*/ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); boolean nextIterationNeeded = true; while (nextIterationNeeded || numExtraIterations != 0) { long iterationNumber = 0; if ((iterCnt == 0) || (iterCnt == 1)) { inputpath = args[1] + "0"; } else { inputpath = args[1] + iterCnt; } iterCnt++; conf.set("iterationNumber", Integer.toString(iterCnt)); conf.set("totalLinks", Long.toString(totalLinks)); outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } try { if (theJob.isComplete()) { Counters jobCtrs = theJob.getCounters(); if (jobCtrs != null) { newCounterVal = jobCtrs.findCounter(ITERATION_COUNTER.ITERATIONS_NEEDED).getValue(); } // If reducer recorded change in species rank, repeat iteration. if ((newCounterVal > 0) || (iterCnt == 1)) { nextIterationNeeded = true; } else { nextIterationNeeded = false; numExtraIterations--; // Do one extra iteration } totalLinks = jobCtrs.findCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS) .getValue(); } totalIterations += 1; if (totalIterations > 200) { System.out.println("too many iterations!!"); } } catch (Exception e) { e.printStackTrace(); } } System.out.println("Total iterations = " + totalIterations); return true; }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java
public static String generate(Path crawlPath, Configuration conf) throws Exception { SegmentUtil.initSegments(crawlPath, conf); String segmentName = SegmentUtil.createSegment(crawlPath, conf); Path currentPath = new Path(crawlPath, "crawldb/current"); Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate"); Job job = new Job(conf); job.setJobName("generate " + crawlPath.toString()); job.setJarByClass(Generator.class); job.setReducerClass(GeneratorReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, generatePath); job.waitForCompletion(true);//ww w. ja va 2 s . co m long count = job.getCounters().findCounter("generator", "count").getValue(); System.out.println("total generate:" + count); if (count == 0) { return null; } else { return segmentName; } }
From source file:cn.jpush.hdfs.mr.example.WordMedian.java
License:Apache License
public int run(String[] args) throws Exception { long random = new Random().nextLong(); log.info("random -> " + random); args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random) }; setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:co.cask.cdap.hbase.wd.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = new Configuration(testingUtility.getConfiguration()); conf.set("fs.defaultFS", "file:///"); conf.set("fs.default.name", "file:///"); conf.setInt("mapreduce.local.map.tasks.maximum", 16); conf.setInt("mapreduce.local.reduce.tasks.maximum", 16); Job job = Job.getInstance(conf, "testMapReduceInternal()-Job"); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0);/* w w w . j a v a 2 s .c o m*/ boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); // Need to kill the job after completion, after it could leave MRAppMaster running not terminated. // Not sure what causing this, but maybe problem in MiniYarnCluster job.killJob(); }
From source file:co.nubetech.hiho.dedup.DedupJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);/*from w w w .j a v a2 s .co m*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("Dedup job"); job.setJarByClass(DedupJob.class); Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); if (dedupBy.equals("key")) { job.setMapperClass(DedupKeyMapper.class); job.setReducerClass(DedupKeyReducer.class); job.setMapOutputValueClass(inputValueClass); } else if (dedupBy.equals("value")) { job.setMapperClass(DedupValueMapper.class); job.setReducerClass(DedupValueReducer.class); job.setMapOutputValueClass(inputKeyClass); } job.setInputFormatClass(inputFormatClass); if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) { DelimitedTextInputFormat.setProperties(job, delimiter, column); } job.setMapOutputKeyClass(HihoTuple.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); job.setPartitionerClass(HihoHashPartitioner.class); FileInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue(); badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue(); duplicateRecords = totalRecordsRead - output; logger.info("Total records read are: " + totalRecordsRead); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); logger.info("Duplicate records are: " + duplicateRecords); } } catch (Exception e) { e.printStackTrace(); } return 0; }