List of usage examples for org.apache.hadoop.mapreduce Counter getValue
long getValue();
From source file:com.metamx.druid.indexer.IndexGeneratorJob.java
License:Open Source License
public boolean run() { try {/*from www.j av a 2 s. c o m*/ Job job = new Job(new Configuration(), String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())); job.getConfiguration().set("io.sort.record.percent", "0.23"); for (String propName : System.getProperties().stringPropertyNames()) { Configuration conf = job.getConfiguration(); if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); } } job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class); SortableBytes.useSortableBytesAsMapOutputKey(job); job.setNumReduceTasks(Iterables.size(config.getAllBuckets())); job.setPartitionerClass(IndexGeneratorPartitioner.class); job.setReducerClass(IndexGeneratorReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); config.intoConfiguration(job); job.setJarByClass(IndexGeneratorJob.class); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); Counter invalidRowCount = job.getCounters() .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); jobStats.setInvalidRowCount(invalidRowCount.getValue()); return success; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java
License:Apache License
/** * Extract the counters from a Job.// w w w . jav a2 s . c o m * * @param job Job from which to get counters. * @return a map from counters to their counts. Keys are group:name. * @throws IOException in case of an error getting the counters. */ private static Map<String, Long> getCounters(final Job job) throws IOException { final Counters counters = job.getCounters(); final Map<String, Long> countersMap = Maps.newHashMap(); for (String group : counters.getGroupNames()) { for (Counter counter : counters.getGroup(group)) { countersMap.put(String.format("%s:%s", group, counter.getName()), counter.getValue()); } } return countersMap; }
From source file:com.netflix.bdp.inviso.history.TraceJobHistoryLoader.java
License:Apache License
private Map<String, Map<String, Long>> handleCounterEntries(Counters counters) { Map<String, Map<String, Long>> result = new HashMap<>(); for (CounterGroup group : counters) { Map<String, Long> cmap = new HashMap<>(); for (Counter counter : group) { cmap.put(counter.getDisplayName(), counter.getValue()); }//from w w w .ja v a2 s . c o m result.put(group.getDisplayName(), cmap); } return result; }
From source file:com.sa.npopa.samples.hbase.RowCounter.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 1) { printUsage("Wrong number of parameters: " + args.length); return -1; }/* www . java2 s.co m*/ Job job = createSubmittableJob(getConf(), args); if (job == null) { return -1; } boolean success = job.waitForCompletion(true); final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1); if (success && expectedCount != -1) { final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS); success = expectedCount == counter.getValue(); if (!success) { LOG.error("Failing job because count of '" + counter.getValue() + "' does not match expected count of '" + expectedCount + "'"); } } return (success ? 0 : 1); }
From source file:com.scaleunlimited.cascading.LoggingFlowProcess.java
License:Apache License
/** * @param counter whose value should be returned * @return current value of the counter, local to the task * <br/><br/><b>Note:</b> Only the JobTracker aggregates task counter values * to report the job-wide total./*from w w w .ja va 2s. co m*/ */ public long getCounter(Enum counter) { if (_isLocal) { AtomicLong count = _localCounters.get(counter); if (count != null) { return count.get(); } else { return 0; } } else { Counters counters = new Counters(); Counter hadoopCounter = counters.findCounter(counter); if (hadoopCounter != null) { return (int) hadoopCounter.getValue(); } else { return 0; } } }
From source file:com.synerzip.analytics.commoncrawl.googleads.counter.GoogleAdsCounterJob.java
License:Apache License
/** * Configures and submits the Map Reduce Job to Hadoop *//*w w w . j av a 2s. co m*/ public int run(String[] args) throws Exception { String inputPath = null; String outputPath = null; boolean overwrite = false; String s3AccessKey = null; String s3SecretKey = null; // Read the command line arguments. We're not using GenericOptionsParser // to prevent having to include commons.cli as a dependency. for (int index = 0; index < args.length; index++) { try { if (ARGNAME_INPATH.equals(args[index])) { inputPath = args[++index]; } else if (ARGNAME_OUTPATH.equals(args[index])) { outputPath = args[++index]; } else if (ARGNAME_S3ACCESSKEY.equals(args[index])) { s3AccessKey = args[++index]; } else if (ARGNAME_S3SECRETKEY.equals(args[index])) { s3SecretKey = args[++index]; } else if (ARGNAME_MAXFILES.equals(args[index])) { // FIXME - No use of static methods WarcFileFilter.setMax(Long.parseLong(args[++index])); } else if (ARGNAME_OVERWRITE.equals(args[index])) { overwrite = true; } else { LOG.warn("Unsupported argument: " + args[index]); } } catch (ArrayIndexOutOfBoundsException e) { usage(); throw new IllegalArgumentException(); } } if (inputPath == null || outputPath == null) { usage(); throw new IllegalArgumentException(); } if (inputPath.contains("s3n") && (s3AccessKey == null || s3SecretKey == null)) { usage(); LOG.info("Please specify Access Key and Secret Key to access data on AWS S3 storage "); throw new IllegalArgumentException(); } // Create the Hadoop job. Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(GoogleAdsCounterJob.class); if (inputPath.contains("s3n") && (s3AccessKey != null && s3SecretKey != null)) { conf.set("AWS_ACCESS_KEY_ID", s3AccessKey); conf.set("AWS_SECRET_ACCESS_KEY", s3SecretKey); } // Scan the provided input path for WARC files. LOG.info("setting input path to '" + inputPath + "'"); WarcFileFilter.setFilter(FILEFILTER); FileInputFormat.addInputPath(job, new Path(inputPath)); // FIXME - I see the problem that you want to give a dynamic number to a // static class. My question is, Is this really required, if we just // point to a file in s3 that should solve our problem FileInputFormat.setInputPathFilter(job, WarcFileFilter.class); // Delete the output path directory if it already exists and user wants // to overwrite it. if (overwrite) { LOG.info("clearing the output path at '" + outputPath + "'"); FileSystem fs = FileSystem.get(new URI(outputPath), conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } } // Set the path where final output 'part' files will be saved. LOG.info("setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); /* * // Defines additional single text based output 'GoogleAdClient' for * the job MultipleOutputs.addNamedOutput(job, "GoogleAdClient", * TextOutputFormat.class, Text.class,LongWritable.class ); * * // Defines additional text based output 'GoogleAdType' for the job * MultipleOutputs.addNamedOutput(job, * "GoogleAdType",TextOutputFormat.class, Text.class, * LongWritable.class); */ // Set which InputFormat class to use. job.setInputFormatClass(WARCInputFormat.class); // Set which OutputFormat class to use. job.setOutputFormatClass(TextOutputFormat.class); /* * Using MultipleOutputs creates zero-sized default output e.g.: * * part-r-00000. To prevent this use LazyOutputFormat instead of * job.setOutputFormatClass(TextOutputFormat.class) in Hadoop job * configuration. */ // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // job.setPartitionerClass(GoogleAdsCounterPartitioner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //job.setNumReduceTasks(4); // Set the output data types. job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // Set which Mapper and Reducer classes to use. job.setMapperClass(GoogleAdsCounterMapper.class); // job.setMapperClass(CrawlMapper_AdStatsDetails.class); job.setReducerClass(GoogleAdsCounterReducer.class); // set combiner //job.setCombinerClass(GoogleAdsCounterReducer.class); // set job name job.setJobName("CommonCrawl Data Processing : Counting Google Ads"); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job completion status : " + job.waitForCompletion(true)); long endTime = System.currentTimeMillis(); long difference = endTime - startTime; LOG.info("Elapsed milliseconds: " + difference); Counter totalResponsePagesCounter = job.getCounters().findCounter(TestCounters.TOTALRESPONSEPAGES); LOG.info("totalResponsePagesCounter = " + totalResponsePagesCounter.getValue()); Counter totalGoogleAdPagesCounter = job.getCounters().findCounter(TestCounters.TOTALGOOGLEADSPAGES); LOG.info("totalGoogleAdPagesCounter = " + totalGoogleAdPagesCounter.getValue()); return 0; } else { return 1; } }
From source file:com.tfm.utad.reducerdata.ReducerDataPig.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-pig"); Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataPig"); job.setJarByClass(ReducerDataPig.class); // Setup MapReduce job.setMapperClass(ReducerDataPigMapper.class); job.setReducerClass(ReducerDataPigReducer.class); job.setNumReduceTasks(1);/*from ww w.j a v a 2s . co m*/ // Specify key / value job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(ReducerPigKey.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-vertica"); Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataVertica"); job.setJarByClass(ReducerDataVertica.class); // Setup MapReduce job.setMapperClass(ReducerDataVerticaMapper.class); job.setReducerClass(ReducerDataVerticaReducer.class); job.setNumReduceTasks(1);/*from w ww . j av a 2 s . c om*/ // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(ReducerVerticaValue.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
/** * Reads the global counters produced by a job on the group labeled with PIG_MAP_RANK_NAME. * Then, it is calculated the cumulative sum, which consists on the sum of previous cumulative * sum plus the previous global counter value. * @param job with the global counters collected. * @param operationID After being collected on global counters (POCounter), * these values are passed via configuration file to PORank, by using the unique * operation identifier//w w w .j a v a 2 s.co m */ private void saveCounters(Job job, String operationID) { Counters counters; Group groupCounters; Long previousValue = 0L; Long previousSum = 0L; ArrayList<Pair<String, Long>> counterPairs; try { counters = HadoopShims.getCounters(job); groupCounters = counters.getGroup(getGroupName(counters.getGroupNames())); Iterator<Counter> it = groupCounters.iterator(); HashMap<Integer, Long> counterList = new HashMap<Integer, Long>(); while (it.hasNext()) { try { Counter c = it.next(); counterList.put(Integer.valueOf(c.getDisplayName()), c.getValue()); } catch (Exception ex) { ex.printStackTrace(); } } counterSize = counterList.size(); counterPairs = new ArrayList<Pair<String, Long>>(); for (int i = 0; i < counterSize; i++) { previousSum += previousValue; previousValue = counterList.get(Integer.valueOf(i)); counterPairs.add(new Pair<String, Long>(TezJobControlCompiler.PIG_MAP_COUNTER + operationID + TezJobControlCompiler.PIG_MAP_SEPARATOR + i, previousSum)); } globalCounters.put(operationID, counterPairs); } catch (Exception e) { String msg = "Error to read counters into Rank operation counterSize " + counterSize; throw new RuntimeException(msg, e); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void parsesMalformedTemperature() throws IOException, InterruptedException {/*from ww w. j a v a 2 s .com*/ Text value = new Text("0335999999433181957042302005+37950+139117SAO +0004" + // Year ^^^^ "RJSN V02011359003150070356999999433201957010100005+353"); // Temperature ^^^^^ Counters counters = new Counters(); new MapDriver<LongWritable, Text, Text, IntWritable>() .withMapper(new MaxTemperatureMapper()) .withInputValue(value) .withCounters(counters) .runTest(); Counter c = counters.findCounter(MaxTemperatureMapper.Temperature.MALFORMED); assertThat(c.getValue(), is(1L)); }