Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.springframework.data.hadoop.batch.mapreduce.JobTasklet.java

License:Apache License

private void saveCounters(Job job, StepContribution contribution) {
    Counters counters = null;//from w  ww . j  ava2s  .c  o  m
    try {
        counters = job.getCounters();
    } catch (Exception ex) {
        if (RuntimeException.class.isAssignableFrom(ex.getClass())) {
            throw (RuntimeException) ex;
        } else {
            // ignore - we just can't get stats
        }
    }
    if (counters == null) {
        return;
    }

    Counter count = counters.findCounter(Task.Counter.MAP_INPUT_RECORDS);

    for (int i = 0; i < safeLongToInt(count.getValue()); i++) {
        contribution.incrementReadCount();
    }

    count = counters.findCounter(Task.Counter.MAP_SKIPPED_RECORDS);
    contribution.incrementReadSkipCount(safeLongToInt(count.getValue()));

    count = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
    contribution.incrementWriteCount(safeLongToInt(count.getValue()));

    count = counters.findCounter(Task.Counter.REDUCE_SKIPPED_RECORDS);

    for (int i = 0; i < safeLongToInt(count.getValue()); i++) {
        contribution.incrementWriteSkipCount();
    }
}

From source file:org.springframework.data.hadoop.batch.mapreduce.JobTasklet.java

License:Apache License

private static void saveJobStats(Job job, StepExecution stepExecution) {
    if (stepExecution == null) {
        return;/*ww w.jav  a 2  s.  co m*/
    }
    ExecutionContext executionContext = stepExecution.getExecutionContext();
    String statusPrefix = "Job Status::";
    executionContext.put(statusPrefix + "ID", JobUtils.getJobId(job).toString());
    executionContext.put(statusPrefix + "Name", job.getJobName());
    executionContext.put(statusPrefix + "Tracking URL", job.getTrackingURL());
    executionContext.put(statusPrefix + "State", JobUtils.getStatus(job).toString());
    try {
        for (String cgName : job.getCounters().getGroupNames()) {
            CounterGroup group = job.getCounters().getGroup(cgName);
            Iterator<Counter> ci = group.iterator();
            while (ci.hasNext()) {
                Counter c = ci.next();
                executionContext.put(group.getDisplayName().trim() + "::" + c.getDisplayName().trim(),
                        c.getValue());
            }
        }
    } catch (Exception ignore) {
    }
}

From source file:org.trend.hgraph.mapreduce.pagerank.Driver.java

License:Apache License

private static long getPageRankChangedCount(Job job) throws IOException {
    long value = 0L;
    try {//from w w w.  ja  v a2 s  .c  o m
        value = job.getCounters().findCounter(CalculatePageRankReducer.Counters.CHANGED_PAGE_RANK_COUNT)
                .getValue();
    } catch (IOException e) {
        LOGGER.error("get pageRankChangedCount failed", e);
        throw e;
    }
    LOGGER.info("pageRankChangedCount=" + value);
    return value;
}

From source file:org.trend.hgraph.mapreduce.pagerank.Driver.java

License:Apache License

private static int collectVeticesTotalCount(Configuration conf, String vertexTableName)
        throws IOException, InterruptedException, ClassNotFoundException {
    long totalCount = 1L;
    boolean success = false;
    Counter counter = null;//from  w w  w . j  a v a 2 s  . c om
    String jobName = null;
    try {
        Job job = RowCounter.createSubmittableJob(conf, new String[] { vertexTableName });
        if (job == null) {
            System.err.println("job is null");
            return 1;
        }

        success = job.waitForCompletion(true);
        counter = job.getCounters()
                .findCounter("org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper$Counters", "ROWS");
        jobName = job.getJobName();
        if (null != counter) {
            totalCount = counter.getValue();
            conf.set(Constants.PAGE_RANK_VERTICES_TOTAL_COUNT_KEY, totalCount + "");
        }
        LOGGER.info(Constants.PAGE_RANK_VERTICES_TOTAL_COUNT_KEY + "=" + totalCount);

    } catch (IOException e) {
        LOGGER.error("run " + jobName + " failed", e);
        throw e;
    } catch (InterruptedException e) {
        LOGGER.error("run " + jobName + " failed", e);
        throw e;
    } catch (ClassNotFoundException e) {
        LOGGER.error("run " + jobName + " failed", e);
        throw e;
    }

    return success ? 0 : -1;
}

From source file:org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (null == args || args.length == 0) {
        System.err.println("no any option given !!");
        printUsage();/*from w w w. j  av  a  2  s .c  o m*/
        return -1;
    }

    System.out.println("options:" + Arrays.toString(args));
    boolean and = true;
    String cmd = null;
    int mustStartIdx = -1;
    for (int a = 0; a < args.length; a++) {
        cmd = args[a];
        if (cmd.startsWith("-")) {
            if (mustStartIdx > -1) {
                System.err.println("option order is incorrect !!");
                printUsage();
                return -1;
            }

            if ("-a".equals(cmd)) {
                and = true;
            } else if ("-o".equals(cmd)) {
                and = false;
            } else {
                System.err.println("option is not defined !!");
                printUsage();
                return -1;
            }
        } else {
            if (mustStartIdx == -1) {
                mustStartIdx = a;
            }
        }
    }

    String tableName = args[mustStartIdx];
    String outputPath = args[mustStartIdx + 1];
    List<String> columns = new ArrayList<String>();
    for (int a = mustStartIdx + 2; a < args.length; a++) {
        columns.add(args[a]);
    }

    LOGGER.info("tableName=" + tableName);
    LOGGER.info("outputPath=" + outputPath);
    LOGGER.info("columns=" + columns);

    Configuration conf = this.getConf();
    conf.setBoolean(Mapper.AND_OR, and);
    conf.setStrings(Mapper.NO_COLUMNS, columns.toArray(new String[] {}));

    Job job = createSubmittableJob(conf, tableName, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        System.err.println("run job:" + job.getJobName() + " failed");
        return -1;
    }

    // for test
    Counter counter = job.getCounters().findCounter(
            "org.trend.hgraph.mapreduce.pagerank.GetNoColumnsRows$Mapper$Counters", "COLLECTED_ROWS");
    if (null != counter) {
        collectedRow = counter.getValue();
    }

    return 0;
}

From source file:org.unigram.likelike.lsh.LSHRecommendations.java

License:Apache License

/**
 * Extract clusters.//from   ww  w.  j av a2s.com
 * @param inputFile input 
 * @param clusterFile cluster files
 * @param conf configuration
 * @return 0 when succeeded
 * @throws IOException -
 * @throws InterruptedException -
 * @throws ClassNotFoundException -
 */
private boolean extractClusters(final String inputFile, final String clusterFile, final Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(inputFile);
    Path outputPath = new Path(clusterFile);
    FsUtil.checkPath(outputPath, FileSystem.get(conf));

    Job job = new Job(conf);
    job.setJarByClass(LSHRecommendations.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setMapperClass(SelectClustersMapper.class);
    job.setCombinerClass(SelectClustersReducer.class);
    job.setReducerClass(SelectClustersReducer.class);
    job.setMapOutputKeyClass(SeedClusterId.class);
    job.setMapOutputValueClass(RelatedUsersWritable.class);
    job.setOutputKeyClass(SeedClusterId.class);
    job.setOutputValueClass(RelatedUsersWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(
            conf.getInt(LikelikeConstants.NUMBER_OF_REDUCES, LikelikeConstants.DEFAULT_NUMBER_OF_REDUCES));

    boolean result = job.waitForCompletion(true);
    this.setResultConf(job.getCounters(), conf);
    return result;
}

From source file:pad.CheckDriver.java

License:Apache License

/**
 * Execute the CheckDriver Job.//from   w ww  .ja va2  s  .  c  om
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the CheckDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "CheckDriver");
    job.setJarByClass(CheckDriver.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(CheckMapper.class);
    job.setReducerClass(CheckReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.input.suffix("_check"));

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variable looking to the counter value
    this.testOk = (job.getCounters().findCounter(UtilCounters.NUM_ERRORS).getValue() == 0);

    // Delete the output folder ( we did not write on it )
    FileSystem.get(conf).delete(input.suffix("_check"), true);

    return 0;
}

From source file:pad.InitializationDriver.java

License:Apache License

/**
 * Execute the InitializationDriver Job.
 * //from w w w .ja  va  2  s. c om
 * If the input file format is adjacency list, then we can easily determinate the initial number of nodes
 * that is equal to the number of rows of the input file while the number of cliques is zero.
 * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent
 * as Mapper and zero Reducer.
 * 
 * If the input file format is cliques list, then we can easily determinate the number of cliques
 * that is equal to the number of rows of the input file.
 * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique
 * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME.
 * Into the regular folder, this Mapper emits all the encountered nodes.
 * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes
 * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally
 * the number of duplicated nodes.
 * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files
 * produced by the Reducer and we move the real results into the main/regular folder.
 * 
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "InitializationDriver");
    job.setJarByClass(InitializationDriver.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (this.type == InputType.ADJACENCY_LIST) {
        // In order to obtain the arcs list from the adjacency list, we need only a Mapper task.
        job.setMapperClass(InitializationMapperAdjacency.class);
        job.setNumReduceTasks(0);
    } else {
        // Set up the special folder.
        MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class,
                IntWritable.class);
        MultipleOutputs.setCountersEnabled(job, true);
        // In order to obtain the edges list from the cliques list, we need only a Mapper task
        // and we save the result into the special folder.
        // Then, we need a Reducer task in order to count the initial number of nodes
        job.setMapperClass(InitializationMapperClique.class);
        job.setCombinerClass(InitializationCombinerNumNodes.class);
        job.setReducerClass(InitializationReducerNumNodes.class);
    }

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue();
    this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue();

    if (this.type == InputType.CLIQUES_LIST) {
        FileSystem fs = FileSystem.get(conf);

        // Delete the empty outputs of the Job
        FileStatus[] filesStatus = fs.listStatus(this.output);
        for (FileStatus fileStatus : filesStatus)
            if (fileStatus.getPath().getName().contains("part"))
                fs.delete(fileStatus.getPath(), false);

        // Move the real outputs into the parent folder
        filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME));
        for (FileStatus fileStatus : filesStatus)
            fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName()));

        // Delete empty special folder
        fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true);
    }

    return 0;
}

From source file:pad.StarDriver.java

License:Apache License

/**
 * Execute the StarDriver Job.//from w w  w.  ja va2  s  .  c  om
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the StarDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    conf.set("type", this.type.toString());
    Job job = new Job(conf, this.title);
    job.setJarByClass(StarDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(StarMapper.class);
    job.setCombinerClass(StarCombiner.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(StarReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variable looking to the counter value
    this.numChanges = job.getCounters().findCounter(UtilCounters.NUM_CHANGES).getValue();
    return 0;
}

From source file:pad.TerminationDriver.java

License:Apache License

/**
 * Execute the TerminationDriver Job.//from  ww  w  . j a va2  s  .c om
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the TerminationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "TerminationDriver");
    job.setJarByClass(TerminationDriver.class);

    job.setMapOutputKeyClass(NodesPairWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ClusterWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(TerminationMapper.class);
    job.setPartitionerClass(NodePartitioner.class);
    job.setGroupingComparatorClass(NodeGroupingComparator.class);
    job.setReducerClass(TerminationReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (!job.waitForCompletion(this.verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numNodes = job.getCounters().findCounter(UtilCounters.NUM_NODES).getValue();
    this.numClusters = job.getCounters().findCounter(UtilCounters.NUM_CLUSTERS).getValue();
    return 0;
}