Example usage for org.apache.hadoop.mapreduce Counter getValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Counter getValue.

Prototype

long getValue();

Source Link

Document

What is the current value of this counter?

Usage

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*from www.j  av  a  2 s.  c o m*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Extract the counters from a Job.// w  w  w  . jav a2 s  . c o  m
 *
 * @param job Job from which to get counters.
 * @return a map from counters to their counts. Keys are group:name.
 * @throws IOException in case of an error getting the counters.
 */
private static Map<String, Long> getCounters(final Job job) throws IOException {
    final Counters counters = job.getCounters();
    final Map<String, Long> countersMap = Maps.newHashMap();
    for (String group : counters.getGroupNames()) {
        for (Counter counter : counters.getGroup(group)) {
            countersMap.put(String.format("%s:%s", group, counter.getName()), counter.getValue());
        }
    }
    return countersMap;
}

From source file:com.netflix.bdp.inviso.history.TraceJobHistoryLoader.java

License:Apache License

private Map<String, Map<String, Long>> handleCounterEntries(Counters counters) {
    Map<String, Map<String, Long>> result = new HashMap<>();

    for (CounterGroup group : counters) {
        Map<String, Long> cmap = new HashMap<>();

        for (Counter counter : group) {
            cmap.put(counter.getDisplayName(), counter.getValue());
        }//from  w w  w .ja  v  a2  s  . c o m

        result.put(group.getDisplayName(), cmap);
    }

    return result;
}

From source file:com.sa.npopa.samples.hbase.RowCounter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        printUsage("Wrong number of parameters: " + args.length);
        return -1;
    }/*  www  .  java2 s.co  m*/
    Job job = createSubmittableJob(getConf(), args);
    if (job == null) {
        return -1;
    }
    boolean success = job.waitForCompletion(true);
    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
    if (success && expectedCount != -1) {
        final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
        success = expectedCount == counter.getValue();
        if (!success) {
            LOG.error("Failing job because count of '" + counter.getValue()
                    + "' does not match expected count of '" + expectedCount + "'");
        }
    }
    return (success ? 0 : 1);
}

From source file:com.scaleunlimited.cascading.LoggingFlowProcess.java

License:Apache License

/**
 * @param counter whose value should be returned
 * @return current value of the counter, local to the task
 * <br/><br/><b>Note:</b> Only the JobTracker aggregates task counter values
 * to report the job-wide total./*from  w  w  w .ja va 2s.  co m*/
 */
public long getCounter(Enum counter) {
    if (_isLocal) {
        AtomicLong count = _localCounters.get(counter);
        if (count != null) {
            return count.get();
        } else {
            return 0;
        }
    } else {
        Counters counters = new Counters();
        Counter hadoopCounter = counters.findCounter(counter);
        if (hadoopCounter != null) {
            return (int) hadoopCounter.getValue();
        } else {
            return 0;
        }
    }
}

From source file:com.synerzip.analytics.commoncrawl.googleads.counter.GoogleAdsCounterJob.java

License:Apache License

/**
 * Configures and submits the Map Reduce Job to Hadoop
 *//*w  w  w . j av  a 2s. co  m*/
public int run(String[] args) throws Exception {

    String inputPath = null;
    String outputPath = null;
    boolean overwrite = false;
    String s3AccessKey = null;
    String s3SecretKey = null;

    // Read the command line arguments. We're not using GenericOptionsParser
    // to prevent having to include commons.cli as a dependency.
    for (int index = 0; index < args.length; index++) {
        try {

            if (ARGNAME_INPATH.equals(args[index])) {
                inputPath = args[++index];
            } else if (ARGNAME_OUTPATH.equals(args[index])) {
                outputPath = args[++index];
            } else if (ARGNAME_S3ACCESSKEY.equals(args[index])) {
                s3AccessKey = args[++index];
            } else if (ARGNAME_S3SECRETKEY.equals(args[index])) {
                s3SecretKey = args[++index];
            } else if (ARGNAME_MAXFILES.equals(args[index])) {
                // FIXME - No use of static methods
                WarcFileFilter.setMax(Long.parseLong(args[++index]));
            } else if (ARGNAME_OVERWRITE.equals(args[index])) {
                overwrite = true;
            } else {
                LOG.warn("Unsupported argument: " + args[index]);
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            usage();
            throw new IllegalArgumentException();
        }
    }

    if (inputPath == null || outputPath == null) {
        usage();
        throw new IllegalArgumentException();
    }

    if (inputPath.contains("s3n") && (s3AccessKey == null || s3SecretKey == null)) {
        usage();
        LOG.info("Please specify Access Key and Secret Key to access data on AWS S3 storage ");
        throw new IllegalArgumentException();
    }

    // Create the Hadoop job.
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(GoogleAdsCounterJob.class);
    if (inputPath.contains("s3n") && (s3AccessKey != null && s3SecretKey != null)) {
        conf.set("AWS_ACCESS_KEY_ID", s3AccessKey);
        conf.set("AWS_SECRET_ACCESS_KEY", s3SecretKey);
    }
    // Scan the provided input path for WARC files.
    LOG.info("setting input path to '" + inputPath + "'");

    WarcFileFilter.setFilter(FILEFILTER);
    FileInputFormat.addInputPath(job, new Path(inputPath));

    // FIXME - I see the problem that you want to give a dynamic number to a
    // static class. My question is, Is this really required, if we just
    // point to a file in s3 that should solve our problem
    FileInputFormat.setInputPathFilter(job, WarcFileFilter.class);

    // Delete the output path directory if it already exists and user wants
    // to overwrite it.
    if (overwrite) {
        LOG.info("clearing the output path at '" + outputPath + "'");
        FileSystem fs = FileSystem.get(new URI(outputPath), conf);
        if (fs.exists(new Path(outputPath))) {
            fs.delete(new Path(outputPath), true);
        }
    }

    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    /*
     * // Defines additional single text based output 'GoogleAdClient' for
     * the job MultipleOutputs.addNamedOutput(job, "GoogleAdClient",
     * TextOutputFormat.class, Text.class,LongWritable.class );
     * 
     * // Defines additional text based output 'GoogleAdType' for the job
     * MultipleOutputs.addNamedOutput(job,
     * "GoogleAdType",TextOutputFormat.class, Text.class,
     * LongWritable.class);
     */
    // Set which InputFormat class to use.
    job.setInputFormatClass(WARCInputFormat.class);

    // Set which OutputFormat class to use.
    job.setOutputFormatClass(TextOutputFormat.class);

    /*
     * Using MultipleOutputs creates zero-sized default output e.g.: *
     * part-r-00000. To prevent this use LazyOutputFormat instead of
     * job.setOutputFormatClass(TextOutputFormat.class) in Hadoop job
     * configuration.
     */
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //   job.setPartitionerClass(GoogleAdsCounterPartitioner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //job.setNumReduceTasks(4);
    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(GoogleAdsCounterMapper.class);
    // job.setMapperClass(CrawlMapper_AdStatsDetails.class);
    job.setReducerClass(GoogleAdsCounterReducer.class);

    // set combiner
    //job.setCombinerClass(GoogleAdsCounterReducer.class);

    // set job name
    job.setJobName("CommonCrawl Data Processing : Counting Google Ads");

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {

        LOG.info("Job completion status : " + job.waitForCompletion(true));
        long endTime = System.currentTimeMillis();

        long difference = endTime - startTime;
        LOG.info("Elapsed milliseconds: " + difference);
        Counter totalResponsePagesCounter = job.getCounters().findCounter(TestCounters.TOTALRESPONSEPAGES);
        LOG.info("totalResponsePagesCounter = " + totalResponsePagesCounter.getValue());

        Counter totalGoogleAdPagesCounter = job.getCounters().findCounter(TestCounters.TOTALGOOGLEADSPAGES);
        LOG.info("totalGoogleAdPagesCounter = " + totalGoogleAdPagesCounter.getValue());

        return 0;
    } else {
        return 1;
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);/*from  ww w.j  a v  a  2s . co  m*/

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);/*from   w ww .  j  av  a 2  s  . c  om*/

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

/**
 * Reads the global counters produced by a job on the group labeled with PIG_MAP_RANK_NAME.
 * Then, it is calculated the cumulative sum, which consists on the sum of previous cumulative
 * sum plus the previous global counter value.
 * @param job with the global counters collected.
 * @param operationID After being collected on global counters (POCounter),
 * these values are passed via configuration file to PORank, by using the unique
 * operation identifier//w w  w .j a  v  a  2  s.co  m
 */
private void saveCounters(Job job, String operationID) {
    Counters counters;
    Group groupCounters;

    Long previousValue = 0L;
    Long previousSum = 0L;
    ArrayList<Pair<String, Long>> counterPairs;

    try {
        counters = HadoopShims.getCounters(job);
        groupCounters = counters.getGroup(getGroupName(counters.getGroupNames()));

        Iterator<Counter> it = groupCounters.iterator();
        HashMap<Integer, Long> counterList = new HashMap<Integer, Long>();

        while (it.hasNext()) {
            try {
                Counter c = it.next();
                counterList.put(Integer.valueOf(c.getDisplayName()), c.getValue());
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
        counterSize = counterList.size();
        counterPairs = new ArrayList<Pair<String, Long>>();

        for (int i = 0; i < counterSize; i++) {
            previousSum += previousValue;
            previousValue = counterList.get(Integer.valueOf(i));
            counterPairs.add(new Pair<String, Long>(TezJobControlCompiler.PIG_MAP_COUNTER + operationID
                    + TezJobControlCompiler.PIG_MAP_SEPARATOR + i, previousSum));
        }

        globalCounters.put(operationID, counterPairs);

    } catch (Exception e) {
        String msg = "Error to read counters into Rank operation counterSize " + counterSize;
        throw new RuntimeException(msg, e);
    }
}

From source file:crunch.MaxTemperature.java

License:Apache License

  @Test
public void parsesMalformedTemperature() throws IOException,
    InterruptedException {/*from  ww  w.  j a  v  a  2 s .com*/
  Text value = new Text("0335999999433181957042302005+37950+139117SAO  +0004" +
                                // Year ^^^^
      "RJSN V02011359003150070356999999433201957010100005+353");
                            // Temperature ^^^^^
  Counters counters = new Counters();
  new MapDriver<LongWritable, Text, Text, IntWritable>()
    .withMapper(new MaxTemperatureMapper())
    .withInputValue(value)
    .withCounters(counters)
    .runTest();
  Counter c = counters.findCounter(MaxTemperatureMapper.Temperature.MALFORMED);
  assertThat(c.getValue(), is(1L));
}