List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.springframework.data.hadoop.batch.mapreduce.JobTasklet.java
License:Apache License
private void saveCounters(Job job, StepContribution contribution) { Counters counters = null;//from w ww . j ava2s .c o m try { counters = job.getCounters(); } catch (Exception ex) { if (RuntimeException.class.isAssignableFrom(ex.getClass())) { throw (RuntimeException) ex; } else { // ignore - we just can't get stats } } if (counters == null) { return; } Counter count = counters.findCounter(Task.Counter.MAP_INPUT_RECORDS); for (int i = 0; i < safeLongToInt(count.getValue()); i++) { contribution.incrementReadCount(); } count = counters.findCounter(Task.Counter.MAP_SKIPPED_RECORDS); contribution.incrementReadSkipCount(safeLongToInt(count.getValue())); count = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); contribution.incrementWriteCount(safeLongToInt(count.getValue())); count = counters.findCounter(Task.Counter.REDUCE_SKIPPED_RECORDS); for (int i = 0; i < safeLongToInt(count.getValue()); i++) { contribution.incrementWriteSkipCount(); } }
From source file:org.springframework.data.hadoop.batch.mapreduce.JobTasklet.java
License:Apache License
private static void saveJobStats(Job job, StepExecution stepExecution) { if (stepExecution == null) { return;/*ww w.jav a 2 s. co m*/ } ExecutionContext executionContext = stepExecution.getExecutionContext(); String statusPrefix = "Job Status::"; executionContext.put(statusPrefix + "ID", JobUtils.getJobId(job).toString()); executionContext.put(statusPrefix + "Name", job.getJobName()); executionContext.put(statusPrefix + "Tracking URL", job.getTrackingURL()); executionContext.put(statusPrefix + "State", JobUtils.getStatus(job).toString()); try { for (String cgName : job.getCounters().getGroupNames()) { CounterGroup group = job.getCounters().getGroup(cgName); Iterator<Counter> ci = group.iterator(); while (ci.hasNext()) { Counter c = ci.next(); executionContext.put(group.getDisplayName().trim() + "::" + c.getDisplayName().trim(), c.getValue()); } } } catch (Exception ignore) { } }
From source file:org.trend.hgraph.mapreduce.pagerank.Driver.java
License:Apache License
private static long getPageRankChangedCount(Job job) throws IOException { long value = 0L; try {//from w w w. ja v a2 s .c o m value = job.getCounters().findCounter(CalculatePageRankReducer.Counters.CHANGED_PAGE_RANK_COUNT) .getValue(); } catch (IOException e) { LOGGER.error("get pageRankChangedCount failed", e); throw e; } LOGGER.info("pageRankChangedCount=" + value); return value; }
From source file:org.trend.hgraph.mapreduce.pagerank.Driver.java
License:Apache License
private static int collectVeticesTotalCount(Configuration conf, String vertexTableName) throws IOException, InterruptedException, ClassNotFoundException { long totalCount = 1L; boolean success = false; Counter counter = null;//from w w w . j a v a 2 s . c om String jobName = null; try { Job job = RowCounter.createSubmittableJob(conf, new String[] { vertexTableName }); if (job == null) { System.err.println("job is null"); return 1; } success = job.waitForCompletion(true); counter = job.getCounters() .findCounter("org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper$Counters", "ROWS"); jobName = job.getJobName(); if (null != counter) { totalCount = counter.getValue(); conf.set(Constants.PAGE_RANK_VERTICES_TOTAL_COUNT_KEY, totalCount + ""); } LOGGER.info(Constants.PAGE_RANK_VERTICES_TOTAL_COUNT_KEY + "=" + totalCount); } catch (IOException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } catch (InterruptedException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } catch (ClassNotFoundException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } return success ? 0 : -1; }
From source file:org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (null == args || args.length == 0) { System.err.println("no any option given !!"); printUsage();/*from w w w. j av a 2 s .c o m*/ return -1; } System.out.println("options:" + Arrays.toString(args)); boolean and = true; String cmd = null; int mustStartIdx = -1; for (int a = 0; a < args.length; a++) { cmd = args[a]; if (cmd.startsWith("-")) { if (mustStartIdx > -1) { System.err.println("option order is incorrect !!"); printUsage(); return -1; } if ("-a".equals(cmd)) { and = true; } else if ("-o".equals(cmd)) { and = false; } else { System.err.println("option is not defined !!"); printUsage(); return -1; } } else { if (mustStartIdx == -1) { mustStartIdx = a; } } } String tableName = args[mustStartIdx]; String outputPath = args[mustStartIdx + 1]; List<String> columns = new ArrayList<String>(); for (int a = mustStartIdx + 2; a < args.length; a++) { columns.add(args[a]); } LOGGER.info("tableName=" + tableName); LOGGER.info("outputPath=" + outputPath); LOGGER.info("columns=" + columns); Configuration conf = this.getConf(); conf.setBoolean(Mapper.AND_OR, and); conf.setStrings(Mapper.NO_COLUMNS, columns.toArray(new String[] {})); Job job = createSubmittableJob(conf, tableName, outputPath); boolean success = job.waitForCompletion(true); if (!success) { System.err.println("run job:" + job.getJobName() + " failed"); return -1; } // for test Counter counter = job.getCounters().findCounter( "org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows$Mapper$Counters", "COLLECTED_ROWS"); if (null != counter) { collectedRow = counter.getValue(); } return 0; }
From source file:org.unigram.likelike.lsh.LSHRecommendations.java
License:Apache License
/** * Extract clusters.//from ww w. j av a2s.com * @param inputFile input * @param clusterFile cluster files * @param conf configuration * @return 0 when succeeded * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ private boolean extractClusters(final String inputFile, final String clusterFile, final Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Path inputPath = new Path(inputFile); Path outputPath = new Path(clusterFile); FsUtil.checkPath(outputPath, FileSystem.get(conf)); Job job = new Job(conf); job.setJarByClass(LSHRecommendations.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(SelectClustersMapper.class); job.setCombinerClass(SelectClustersReducer.class); job.setReducerClass(SelectClustersReducer.class); job.setMapOutputKeyClass(SeedClusterId.class); job.setMapOutputValueClass(RelatedUsersWritable.class); job.setOutputKeyClass(SeedClusterId.class); job.setOutputValueClass(RelatedUsersWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks( conf.getInt(LikelikeConstants.NUMBER_OF_REDUCES, LikelikeConstants.DEFAULT_NUMBER_OF_REDUCES)); boolean result = job.waitForCompletion(true); this.setResultConf(job.getCounters(), conf); return result; }
From source file:pad.CheckDriver.java
License:Apache License
/** * Execute the CheckDriver Job.//from w ww .ja va2 s . c om * @param args array of external arguments, not used in this method * @return <c>1</c> if the CheckDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "CheckDriver"); job.setJarByClass(CheckDriver.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(CheckMapper.class); job.setReducerClass(CheckReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.input.suffix("_check")); if (!job.waitForCompletion(verbose)) return 1; // Set up the private variable looking to the counter value this.testOk = (job.getCounters().findCounter(UtilCounters.NUM_ERRORS).getValue() == 0); // Delete the output folder ( we did not write on it ) FileSystem.get(conf).delete(input.suffix("_check"), true); return 0; }
From source file:pad.InitializationDriver.java
License:Apache License
/** * Execute the InitializationDriver Job. * //from w w w .ja va 2 s. c om * If the input file format is adjacency list, then we can easily determinate the initial number of nodes * that is equal to the number of rows of the input file while the number of cliques is zero. * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent * as Mapper and zero Reducer. * * If the input file format is cliques list, then we can easily determinate the number of cliques * that is equal to the number of rows of the input file. * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME. * Into the regular folder, this Mapper emits all the encountered nodes. * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally * the number of duplicated nodes. * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files * produced by the Reducer and we move the real results into the main/regular folder. * * @param args array of external arguments, not used in this method * @return <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "InitializationDriver"); job.setJarByClass(InitializationDriver.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (this.type == InputType.ADJACENCY_LIST) { // In order to obtain the arcs list from the adjacency list, we need only a Mapper task. job.setMapperClass(InitializationMapperAdjacency.class); job.setNumReduceTasks(0); } else { // Set up the special folder. MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class, IntWritable.class); MultipleOutputs.setCountersEnabled(job, true); // In order to obtain the edges list from the cliques list, we need only a Mapper task // and we save the result into the special folder. // Then, we need a Reducer task in order to count the initial number of nodes job.setMapperClass(InitializationMapperClique.class); job.setCombinerClass(InitializationCombinerNumNodes.class); job.setReducerClass(InitializationReducerNumNodes.class); } if (!job.waitForCompletion(verbose)) return 1; // Set up the private variables looking to the counters value this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue(); this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue(); if (this.type == InputType.CLIQUES_LIST) { FileSystem fs = FileSystem.get(conf); // Delete the empty outputs of the Job FileStatus[] filesStatus = fs.listStatus(this.output); for (FileStatus fileStatus : filesStatus) if (fileStatus.getPath().getName().contains("part")) fs.delete(fileStatus.getPath(), false); // Move the real outputs into the parent folder filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME)); for (FileStatus fileStatus : filesStatus) fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName())); // Delete empty special folder fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true); } return 0; }
From source file:pad.StarDriver.java
License:Apache License
/** * Execute the StarDriver Job.//from w w w. ja va2 s . c om * @param args array of external arguments, not used in this method * @return <c>1</c> if the StarDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); conf.set("type", this.type.toString()); Job job = new Job(conf, this.title); job.setJarByClass(StarDriver.class); job.setMapOutputKeyClass(NodesPairWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(StarMapper.class); job.setCombinerClass(StarCombiner.class); job.setPartitionerClass(NodePartitioner.class); job.setGroupingComparatorClass(NodeGroupingComparator.class); job.setReducerClass(StarReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (!job.waitForCompletion(verbose)) return 1; // Set up the private variable looking to the counter value this.numChanges = job.getCounters().findCounter(UtilCounters.NUM_CHANGES).getValue(); return 0; }
From source file:pad.TerminationDriver.java
License:Apache License
/** * Execute the TerminationDriver Job.//from ww w . j a va2 s .c om * @param args array of external arguments, not used in this method * @return <c>1</c> if the TerminationDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "TerminationDriver"); job.setJarByClass(TerminationDriver.class); job.setMapOutputKeyClass(NodesPairWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ClusterWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(TerminationMapper.class); job.setPartitionerClass(NodePartitioner.class); job.setGroupingComparatorClass(NodeGroupingComparator.class); job.setReducerClass(TerminationReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (!job.waitForCompletion(this.verbose)) return 1; // Set up the private variables looking to the counters value this.numNodes = job.getCounters().findCounter(UtilCounters.NUM_NODES).getValue(); this.numClusters = job.getCounters().findCounter(UtilCounters.NUM_CLUSTERS).getValue(); return 0; }