Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:com.cloudera.sqoop.shims.CDH3Shim.java

License:Apache License

@Override
public long getNumMapOutputRecords(Job job) throws IOException, InterruptedException {
    return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS")
            .getValue();//  w w w . jav  a  2s  .c o m
}

From source file:com.cloudera.sqoop.shims.CDH3Shim.java

License:Apache License

@Override
public long getNumMapInputRecords(Job job) throws IOException, InterruptedException {
    return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();/* w w w.j  a  v  a2 s .  c  o  m*/
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testSplits() throws Exception {

    BufferedWriter writer = new BufferedWriter(new FileWriter(IN));
    for (int i = 0; i < 10000; i++) {
        writer.write("str1" + " " + "str2" + " " + "30" + " " + "4000" + "\n");
    }/*from  ww w  .j  a va  2 s .c o  m*/
    writer.close();

    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
            TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
            FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    conf.setLong("mapred.min.split.size", 10 * 1024);
    conf.setLong("dfs.block.size", 10 * 1024);
    conf.setLong("mapred.max.split.size", 10 * 1024);

    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);

    MapOnlyJobBuilder mapOnly = new MapOnlyJobBuilder(conf);
    mapOnly.addInput(new Path(IN), inputFormat,
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context)
                        throws IOException, InterruptedException {
                    Assert.assertEquals("str1", key.get("a").toString());
                    Assert.assertEquals("str2", key.get("b").toString());
                    Assert.assertEquals((Integer) 30, (Integer) key.get("c"));
                    Assert.assertEquals((Long) 4000l, (Long) key.get("d"));
                    context.getCounter("stats", "nlines").increment(1);
                };
            });

    HadoopUtils.deleteIfExists(fS, outPath);
    mapOnly.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mapOnly.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mapOnly.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, new Path(IN));

    assertEquals(10000, job.getCounters().getGroup("stats").findCounter("nlines").getValue());
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testInputCompression() throws Exception {
    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
            TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
            FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);

    MapOnlyJobBuilder mapOnly = new MapOnlyJobBuilder(conf);
    mapOnly.addInput(new Path("src/test/resources/*.gz"), inputFormat,
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context)
                        throws IOException, InterruptedException {
                    Assert.assertNotNull(key.get("a").toString());
                    Assert.assertNotNull(key.get("b").toString());
                    Assert.assertTrue((Integer) key.get("c") > 0);
                    Assert.assertTrue((Long) key.get("d") > 0);
                    context.getCounter("stats", "nlines").increment(1);
                };//from ww  w. j a v a 2s  .c o  m
            });

    HadoopUtils.deleteIfExists(fS, outPath);
    mapOnly.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mapOnly.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mapOnly.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, new Path(IN));

    assertEquals(100, job.getCounters().getGroup("stats").findCounter("nlines").getValue());
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java

License:Apache License

private int processJobCounters(Job job) {
    int retVal = 0;
    try {/*  w  ww  .j ava  2s  .  c om*/

        Counters counters = job.getCounters();

        long failedCount = counters.findCounter(BLUESHIFT_COUNTER.FAILED_COUNT).getValue();

        long successCount = counters.findCounter(BLUESHIFT_COUNTER.SUCCESS_COUNT).getValue();

        long verifiedFailedCount = counters.findCounter(BLUESHIFT_COUNTER.VERIFIED_FAILED_COUNT).getValue();

        System.out.println(
                "Total Success Transfers: " + successCount + ", Total Failed Transfers: " + failedCount);

        if (failedCount > 0 || verifiedFailedCount > 0) {
            System.err.println("There are failedCount[" + failedCount + "], verifiedFailedCount["
                    + verifiedFailedCount + "] transfers, Please re-run the job...");
            retVal = (int) failedCount;
        }

        long verifiedSuccessCount = counters.findCounter(BLUESHIFT_COUNTER.VERIFIED_SUCCESS_COUNT).getValue();
        if (successCount != verifiedSuccessCount) {
            System.err.println("Verification not done for all files : successCount[" + successCount
                    + "], verifiedSuccessCount[" + verifiedSuccessCount + "].");
            retVal = (int) (successCount - verifiedSuccessCount);
        }

    } catch (Exception e) {
        System.out.println("Error processing job counters: " + e.getMessage());
        retVal = 1;
    }
    return retVal;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java

License:Apache License

@Ignore
@Test/*  w w w .  ja v  a 2 s.c om*/
public void testCounterProvider() {
    try {
        Job job = MockJobTracker.getJobForClient();
        Counters a = EMPTY_COUNTERS;
        CounterGroup grp = a.getGroup("abc");
        Counter cntr = grp.findCounter("counter");
        cntr.increment(100);
        CounterProvider cp = new CounterProvider(a);
        job.submit();
        Assert.assertEquals(job.getCounters(), a);
    } catch (Exception e) {
        LOG.error("Exception encountered ", e);
    }
}

From source file:com.linkedin.cubert.utils.ScriptStats.java

License:Open Source License

public void addJob(final Job job) {
    getStats(job).endTime = System.currentTimeMillis();
    Counters counters;/*from  www  .j  a  v a  2  s . c om*/
    try {
        counters = job.getCounters();
    } catch (IOException e) {
        counters = null;
    }
    getStats(job).counters = counters;
}

From source file:com.linkedin.hadoop.example.WordCountCounters.java

License:Apache License

/**
 * Azkaban will look for a method named `run` to start your job. Use this method to setup all the
 * Hadoop-related configuration for your job and submit it.
 *
 * @throws Exception If there is an exception during the configuration or submission of your job
 *///from w  ww .  jav a2 s  .c  o  m
public void run() throws Exception {
    _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName()));

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WordCountJob.class);
    job.setJobName(_name);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String inputPath = _properties.getProperty("input.path");
    String outputPath = _properties.getProperty("output.path");
    boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false"));

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Before we submit the job, remove the old the output directory
    if (forceOverwrite) {
        FileSystem fs = FileSystem.get(job.getConfiguration());
        fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If
    // you don't use Kerberos security for your Hadoop cluster, you don't need this code.
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Submit the job for execution
    _logger.info(String.format("About to submit the job named %s", _name));
    boolean succeeded = job.waitForCompletion(true);

    // Before we return, display our custom counters for the job in the Azkaban logs
    long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue();
    _logger.info(String.format("Read a total of %d input words", inputWords));

    // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception
    if (!succeeded) {
        throw new Exception(String.format("Azkaban job %s failed", _name));
    }
}

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from  ww  w. j a v  a 2s. c o m
    job.setJarByClass(AggregationPhaseJob.class);

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));
    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(AggregationMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Reduce config
    job.setReducerClass(AggregationReducer.class);
    job.setOutputKeyClass(AvroKey.class);
    job.setOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, avroSchema);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
    LOGGER.info("Num Reducers : {}", numReducers);
    if (StringUtils.isNotBlank(numReducers)) {
        job.setNumReduceTasks(Integer.valueOf(numReducers));
        LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
    }

    job.waitForCompletion(true);

    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    if (counter.getValue() == 0) {
        throw new IllegalStateException("No input records in " + inputPathDir);
    }
    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());

    for (String metric : thirdeyeConfig.getMetricNames()) {
        counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
        LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    }

    return job;
}

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

private static void submitJob(Job job) throws Exception {
    String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME);

    // find job jar
    File cpHomeDir = new File(cpHome);
    FilenameFilter jobJarFilter = new FilenameFilter() {
        @Override//from  w  ww. j ava2  s .  c  o m
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) {
                return true;
            } else {
                return false;
            }
        }
    };
    File[] cpJars = cpHomeDir.listFiles(jobJarFilter);
    if (cpJars == null || cpJars.length == 0) {
        throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome);
    }
    if (cpJars.length > 1) {
        throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome);
    }
    // set job jar
    Configuration conf = job.getConfiguration();
    conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString());

    // find lib jars
    FilenameFilter filter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && !name.startsWith("hadoop")) {
                return true;
            } else {
                return false;
            }
        }

    };

    // set lib jars
    StringBuilder jars = new StringBuilder();
    for (File jar : cpHomeDir.listFiles(filter)) {
        if (jars.length() > 0) {
            jars.append(',');
        }
        jars.append(jar.toURI().toURL().toString());
    }
    conf.set("tmpjars", jars.toString());
    if (LOG.isTraceEnabled())
        LOG.trace("LIBJARS:" + jars.toString());
    job.waitForCompletion(true);
    AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters());
}