Example usage for org.apache.hadoop.mapreduce Counter getValue

List of usage examples for org.apache.hadoop.mapreduce Counter getValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Counter getValue.

Prototype

long getValue();

Source Link

Document

What is the current value of this counter?

Usage

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*from www.j  av  a  2 s.  c o m*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Extract the counters from a Job.// w  w  w  . jav a2 s  . c o  m
 *
 * @param job Job from which to get counters.
 * @return a map from counters to their counts. Keys are group:name.
 * @throws IOException in case of an error getting the counters.
 */
private static Map<String, Long> getCounters(final Job job) throws IOException {
    final Counters counters = job.getCounters();
    final Map<String, Long> countersMap = Maps.newHashMap();
    for (String group : counters.getGroupNames()) {
        for (Counter counter : counters.getGroup(group)) {
            countersMap.put(String.format("%s:%s", group, counter.getName()), counter.getValue());
        }
    }
    return countersMap;
}

From source file:com.netflix.bdp.inviso.history.TraceJobHistoryLoader.java

License:Apache License

private Map<String, Map<String, Long>> handleCounterEntries(Counters counters) {
    Map<String, Map<String, Long>> result = new HashMap<>();

    for (CounterGroup group : counters) {
        Map<String, Long> cmap = new HashMap<>();

        for (Counter counter : group) {
            cmap.put(counter.getDisplayName(), counter.getValue());
        }//from  w w  w .ja  v  a2  s  . c o m

        result.put(group.getDisplayName(), cmap);
    }

    return result;
}

From source file:com.sa.npopa.samples.hbase.RowCounter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        printUsage("Wrong number of parameters: " + args.length);
        return -1;
    }/*  www  .  java2 s.co  m*/
    Job job = createSubmittableJob(getConf(), args);
    if (job == null) {
        return -1;
    }
    boolean success = job.waitForCompletion(true);
    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
    if (success && expectedCount != -1) {
        final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
        success = expectedCount == counter.getValue();
        if (!success) {
            LOG.error("Failing job because count of '" + counter.getValue()
                    + "' does not match expected count of '" + expectedCount + "'");
        }
    }
    return (success ? 0 : 1);
}

From source file:com.scaleunlimited.cascading.LoggingFlowProcess.java

License:Apache License

/**
 * @param counter whose value should be returned
 * @return current value of the counter, local to the task
 * <br/><br/><b>Note:</b> Only the JobTracker aggregates task counter values
 * to report the job-wide total./*from  w  w  w .ja va 2s.  co m*/
 */
public long getCounter(Enum counter) {
    if (_isLocal) {
        AtomicLong count = _localCounters.get(counter);
        if (count != null) {
            return count.get();
        } else {
            return 0;
        }
    } else {
        Counters counters = new Counters();
        Counter hadoopCounter = counters.findCounter(counter);
        if (hadoopCounter != null) {
            return (int) hadoopCounter.getValue();
        } else {
            return 0;
        }
    }
}

From source file:com.synerzip.analytics.commoncrawl.googleads.counter.GoogleAdsCounterJob.java

License:Apache License

/**
 * Configures and submits the Map Reduce Job to Hadoop
 *//*w  w  w . j av  a 2s. co  m*/
public int run(String[] args) throws Exception {

    String inputPath = null;
    String outputPath = null;
    boolean overwrite = false;
    String s3AccessKey = null;
    String s3SecretKey = null;

    // Read the command line arguments. We're not using GenericOptionsParser
    // to prevent having to include commons.cli as a dependency.
    for (int index = 0; index < args.length; index++) {
        try {

            if (ARGNAME_INPATH.equals(args[index])) {
                inputPath = args[++index];
            } else if (ARGNAME_OUTPATH.equals(args[index])) {
                outputPath = args[++index];
            } else if (ARGNAME_S3ACCESSKEY.equals(args[index])) {
                s3AccessKey = args[++index];
            } else if (ARGNAME_S3SECRETKEY.equals(args[index])) {
                s3SecretKey = args[++index];
            } else if (ARGNAME_MAXFILES.equals(args[index])) {
                // FIXME - No use of static methods
                WarcFileFilter.setMax(Long.parseLong(args[++index]));
            } else if (ARGNAME_OVERWRITE.equals(args[index])) {
                overwrite = true;
            } else {
                LOG.warn("Unsupported argument: " + args[index]);
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            usage();
            throw new IllegalArgumentException();
        }
    }

    if (inputPath == null || outputPath == null) {
        usage();
        throw new IllegalArgumentException();
    }

    if (inputPath.contains("s3n") && (s3AccessKey == null || s3SecretKey == null)) {
        usage();
        LOG.info("Please specify Access Key and Secret Key to access data on AWS S3 storage ");
        throw new IllegalArgumentException();
    }

    // Create the Hadoop job.
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(GoogleAdsCounterJob.class);
    if (inputPath.contains("s3n") && (s3AccessKey != null && s3SecretKey != null)) {
        conf.set("AWS_ACCESS_KEY_ID", s3AccessKey);
        conf.set("AWS_SECRET_ACCESS_KEY", s3SecretKey);
    }
    // Scan the provided input path for WARC files.
    LOG.info("setting input path to '" + inputPath + "'");

    WarcFileFilter.setFilter(FILEFILTER);
    FileInputFormat.addInputPath(job, new Path(inputPath));

    // FIXME - I see the problem that you want to give a dynamic number to a
    // static class. My question is, Is this really required, if we just
    // point to a file in s3 that should solve our problem
    FileInputFormat.setInputPathFilter(job, WarcFileFilter.class);

    // Delete the output path directory if it already exists and user wants
    // to overwrite it.
    if (overwrite) {
        LOG.info("clearing the output path at '" + outputPath + "'");
        FileSystem fs = FileSystem.get(new URI(outputPath), conf);
        if (fs.exists(new Path(outputPath))) {
            fs.delete(new Path(outputPath), true);
        }
    }

    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    /*
     * // Defines additional single text based output 'GoogleAdClient' for
     * the job MultipleOutputs.addNamedOutput(job, "GoogleAdClient",
     * TextOutputFormat.class, Text.class,LongWritable.class );
     * 
     * // Defines additional text based output 'GoogleAdType' for the job
     * MultipleOutputs.addNamedOutput(job,
     * "GoogleAdType",TextOutputFormat.class, Text.class,
     * LongWritable.class);
     */
    // Set which InputFormat class to use.
    job.setInputFormatClass(WARCInputFormat.class);

    // Set which OutputFormat class to use.
    job.setOutputFormatClass(TextOutputFormat.class);

    /*
     * Using MultipleOutputs creates zero-sized default output e.g.: *
     * part-r-00000. To prevent this use LazyOutputFormat instead of
     * job.setOutputFormatClass(TextOutputFormat.class) in Hadoop job
     * configuration.
     */
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //   job.setPartitionerClass(GoogleAdsCounterPartitioner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //job.setNumReduceTasks(4);
    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(GoogleAdsCounterMapper.class);
    // job.setMapperClass(CrawlMapper_AdStatsDetails.class);
    job.setReducerClass(GoogleAdsCounterReducer.class);

    // set combiner
    //job.setCombinerClass(GoogleAdsCounterReducer.class);

    // set job name
    job.setJobName("CommonCrawl Data Processing : Counting Google Ads");

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {

        LOG.info("Job completion status : " + job.waitForCompletion(true));
        long endTime = System.currentTimeMillis();

        long difference = endTime - startTime;
        LOG.info("Elapsed milliseconds: " + difference);
        Counter totalResponsePagesCounter = job.getCounters().findCounter(TestCounters.TOTALRESPONSEPAGES);
        LOG.info("totalResponsePagesCounter = " + totalResponsePagesCounter.getValue());

        Counter totalGoogleAdPagesCounter = job.getCounters().findCounter(TestCounters.TOTALGOOGLEADSPAGES);
        LOG.info("totalGoogleAdPagesCounter = " + totalGoogleAdPagesCounter.getValue());

        return 0;
    } else {
        return 1;
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);/*from  ww w.j  a v  a  2s . co  m*/

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);/*from   w ww .  j  av  a 2  s  . c  om*/

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

/**
 * Reads the global counters produced by a job on the group labeled with PIG_MAP_RANK_NAME.
 * Then, it is calculated the cumulative sum, which consists on the sum of previous cumulative
 * sum plus the previous global counter value.
 * @param job with the global counters collected.
 * @param operationID After being collected on global counters (POCounter),
 * these values are passed via configuration file to PORank, by using the unique
 * operation identifier//w w  w .j a  v  a  2  s.co  m
 */
private void saveCounters(Job job, String operationID) {
    Counters counters;
    Group groupCounters;

    Long previousValue = 0L;
    Long previousSum = 0L;
    ArrayList<Pair<String, Long>> counterPairs;

    try {
        counters = HadoopShims.getCounters(job);
        groupCounters = counters.getGroup(getGroupName(counters.getGroupNames()));

        Iterator<Counter> it = groupCounters.iterator();
        HashMap<Integer, Long> counterList = new HashMap<Integer, Long>();

        while (it.hasNext()) {
            try {
                Counter c = it.next();
                counterList.put(Integer.valueOf(c.getDisplayName()), c.getValue());
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
        counterSize = counterList.size();
        counterPairs = new ArrayList<Pair<String, Long>>();

        for (int i = 0; i < counterSize; i++) {
            previousSum += previousValue;
            previousValue = counterList.get(Integer.valueOf(i));
            counterPairs.add(new Pair<String, Long>(TezJobControlCompiler.PIG_MAP_COUNTER + operationID
                    + TezJobControlCompiler.PIG_MAP_SEPARATOR + i, previousSum));
        }

        globalCounters.put(operationID, counterPairs);

    } catch (Exception e) {
        String msg = "Error to read counters into Rank operation counterSize " + counterSize;
        throw new RuntimeException(msg, e);
    }
}

From source file:crunch.MaxTemperature.java

License:Apache License

  @Test
public void parsesMalformedTemperature() throws IOException,
    InterruptedException {/*from  ww  w.  j a  v  a  2 s .com*/
  Text value = new Text("0335999999433181957042302005+37950+139117SAO  +0004" +
                                // Year ^^^^
      "RJSN V02011359003150070356999999433201957010100005+353");
                            // Temperature ^^^^^
  Counters counters = new Counters();
  new MapDriver<LongWritable, Text, Text, IntWritable>()
    .withMapper(new MaxTemperatureMapper())
    .withInputValue(value)
    .withCounters(counters)
    .runTest();
  Counter c = counters.findCounter(MaxTemperatureMapper.Temperature.MALFORMED);
  assertThat(c.getValue(), is(1L));
}