Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:co.nubetech.hiho.merge.MergeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    populateConfiguration(args);/*from ww w  .j  ava2  s . c  om*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);

    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);

    if (mergeBy.equals("key")) {
        job.setMapperClass(MergeKeyMapper.class);
        job.setReducerClass(MergeKeyReducer.class);

    } else if (mergeBy.equals("value")) {
        job.setMapperClass(MergeValueMapper.class);
        job.setReducerClass(MergeValueReducer.class);
    }

    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
            totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
            badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue();
            logger.info("Total old records read are: " + totalRecordsOld);
            logger.info("Total new records read are: " + totalRecordsNew);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

private static Object[] runJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath)
        throws Exception {
    Long everyJobInputLine = 0L;//ww w. j  a v a2s.  co  m
    Long everyJobOutputLine = 0L;
    Long everyJobBadLine = 0L;
    Job job = null;
    try {
        job = createSubmittableJob(conf, tableName, inputPath, tmpOutputPath);
    } catch (Exception e) {
        System.err.println(
                "ERROR:mutiplecolumn bulkload when create submittableJob error is :" + e.fillInStackTrace());
        return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
    }
    boolean completion = false;
    try {
        if (job == null)
            return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
        completion = job.waitForCompletion(true);
        everyJobBadLine = job.getCounters().getGroup("ImportTsv").findCounter("Bad Lines").getValue();
        everyJobInputLine = job.getCounters().getGroup("ImportTsv").findCounter("total Lines").getValue();
        everyJobOutputLine = everyJobInputLine - everyJobBadLine;
    } catch (Exception e) {
        System.err.println("ERROR:mutiplecolumn bulkload when execute Job error is :" + e.fillInStackTrace());
        return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
    }
    try {
        if (completion && !StringUtils.isEmpty(tmpOutputPath)) {
            String[] toolRunnerArgs = new String[] { tmpOutputPath, tableName };
            int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), toolRunnerArgs);
            return new Object[] { ret == 0, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
        } else {
            return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
        }
    } catch (Exception e) {
        System.err.println(
                "ERROR:mutiplecolumn bulkload when LoadIncrementalHFiles error is :" + e.fillInStackTrace());
        return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine };
    }
}

From source file:com.alectenharmsel.research.hadoop.LcCounters.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args);
    Configuration conf = parse.getConfiguration();

    String[] remainingArgs = parse.getRemainingArgs();
    if (remainingArgs.length != 2) {
        System.err.println("Usage: LineCount <input> <output>");
        System.exit(-1);/*from  w ww  . j ava  2s  .co  m*/
    }

    Job job = Job.getInstance(conf, "LineCount");
    job.setJarByClass(LineCount.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(remainingArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));

    boolean success = job.waitForCompletion(true);

    //Get the counter here and print it
    Counters counters = job.getCounters();
    long total = counters.findCounter(LcCounters.NUM_LINES).getValue();
    System.out.println(Long.toString(total));

    int res = success ? 0 : 1;
    System.exit(res);
}

From source file:com.alectenharmsel.research.LcCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: LineCounter <input> <output>");
        System.exit(-1);/*from  w  w  w .j  av  a2s  .  co  m*/
    }

    Job job = new Job(getConf(), "LineCount");
    job.setJarByClass(LineCount.class);

    job.setInputFormatClass(WholeBlockInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LineCountMapper.class);
    job.setReducerClass(LineCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    //Get the counter here, output to a file called total in the dir
    Counters counters = job.getCounters();

    //Throw it in the file
    Path outPath = new Path(args[1]);
    FileSystem fs = outPath.getFileSystem(check);
    OutputStream out = fs.create(new Path(outPath, "total"));
    String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n";
    out.write(total.getBytes());
    out.close();
    return success ? 0 : 1;
}

From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    String input = null;//ww w .j av  a  2s . c o m
    String output = null;

    if (args.length < 2) {
        System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName());
        return -1;
    } else {
        input = args[0];
        output = args[1];
    }

    Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver");
    Configuration conf = job.getConfiguration();

    //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write)
    String fapath = createTempFileAppender(job);

    //get schema
    Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class);
    job.getConfiguration().set("outSchema", outSchema.toString());

    //Job conf settings
    job.setJarByClass(BinRecToAvroRecDriver.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(BinRecInputFormat.class);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    AvroJob.setOutputKeySchema(job, outSchema);

    AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setMapOutputValueSchema(job, outSchema);

    //Job output compression
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC);

    //Input and Output Paths
    FileInputFormat.setInputPaths(job, new Path(input));
    Path outPath = new Path(output);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    boolean jobCompletionStatus = job.waitForCompletion(true);

    //Print Custom Counters before exiting
    Counters counters = job.getCounters();
    for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) {
        Counter thisCounter = counters.findCounter(customCounter);
        System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue());
    }

    long mycnt1 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT1")
            .getValue();
    long mycnt2 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT2")
            .getValue();
    long mycnt3 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT3")
            .getValue();

    long myfakekpi = mycnt1 - mycnt2;

    String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi);
    System.out.println(msgMyfakekpi);
    logger.info(msgMyfakekpi);

    //Finished, so move job log to HDFS in _log dir, clean
    copyTempFileAppenderToHDFSOutpath(job, fapath, output);

    return jobCompletionStatus ? 0 : 1;
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);//from ww w. j  av  a2s .  c  om
    try {
        final int totalMapSlots = getConf().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
        if (-1 == options.test.numRows) {
            options.test.numRows = totalMapSlots;
        }
        final TableOperations ops = options.connection.getConnector().tableOperations();
        final List<String> names = options.test.getTableNames(ops);
        int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0);
        if (-1 != options.test.numReduceSlots) {
            totalReduceSlots = options.test.numReduceSlots;
        }
        if (0 == totalReduceSlots) {
            totalReduceSlots = names.size();
        }
        final int reducesPerJob = Math.max(1, totalReduceSlots / names.size());

        final List<Job> jobs = new ArrayList();
        for (String name : names) {
            final Job job = new Job(getConf(), jobName + " " + name);
            job.setJarByClass(this.getClass());
            options.input.useAccumuloInputFormat(job, name);
            job.setMapperClass(DataVerifyMapper.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            job.setReducerClass(LongSumReducer.class);
            job.setCombinerClass(LongSumReducer.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job, new Path(options.test.output, name));
            job.setNumReduceTasks(reducesPerJob);
            job.submit();
            jobs.add(job);
        }

        boolean success = true;
        final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length;
        final long numCellsPerFamily = options.test.qualifiers * options.test.numRows;
        for (Job job : jobs) {
            success &= job.waitForCompletion(true);
            final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName());
            if (null == group) {
                log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper.");
                success = false;
            } else {
                final Counter badCounter = group.findCounter(BAD_COUNTER);
                if (null != badCounter && 0 < badCounter.getValue()) {
                    log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue()
                            + " entries with bad checksums.");
                    success = false;
                }
                int numRows = 0;
                int numFamilies = 0;
                for (Counter counter : group) {
                    if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) {
                        numRows++;
                        if (numCellsPerRow != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerRow + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) {
                        numFamilies++;
                        if (numCellsPerFamily != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerFamily + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    }
                }
                if (options.test.numRows != numRows) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows
                            + " rows, but has " + numRows);
                    success = false;
                }
                if (DataCompatibilityLoad.FAMILIES.length != numFamilies) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have "
                            + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies);
                    success = false;
                }
            }
        }
        if (success) {
            log.info("All internal checks passed.");
        } else {
            log.info("Some checks failed. see log.");
        }
        return success ? 0 : 1;
    } finally {
        options.input.close();
    }
}

From source file:com.cloudera.ByteCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(new Configuration());

    // Trim off the hadoop-specific args
    String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Pull in properties
    Options options = new Options();

    Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
            .withDescription("use value for given property").create("D");
    options.addOption(property);/*from ww  w .j  a va 2s.  co  m*/

    Option skipChecksums = new Option("skipChecksums", "skip checksums");
    options.addOption(skipChecksums);

    Option profile = new Option("profile", "profile tasks");
    options.addOption(profile);

    CommandLineParser parser = new BasicParser();
    CommandLine line = parser.parse(options, remArgs);

    Properties properties = line.getOptionProperties("D");
    for (Entry<Object, Object> prop : properties.entrySet()) {
        conf.set(prop.getKey().toString(), prop.getValue().toString());
        System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue());
    }

    if (line.hasOption("skipChecksums")) {
        conf.setBoolean("bytecount.skipChecksums", true);
        System.out.println("Skipping checksums");
    }

    if (line.hasOption("profile")) {
        conf.setBoolean("mapred.task.profile", true);
        conf.set("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s");
        conf.set(MRJobConfig.NUM_MAP_PROFILES, "0");
        conf.set("mapred.task.profile.maps", "1");
        System.out.println("Profiling map tasks");
    }

    // Get the positional arguments out
    remArgs = line.getArgs();
    if (remArgs.length != 2) {
        System.err.println("Usage: ByteCount <inputBase> <outputBase>");
        System.exit(1);
    }
    String inputBase = remArgs[0];
    String outputBase = remArgs[1];

    Job job = Job.getInstance(conf);

    job.setInputFormatClass(ByteBufferInputFormat.class);

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(ByteCountMapper.class);
    job.setReducerClass(ByteCountReducer.class);
    job.setCombinerClass(ByteCountReducer.class);

    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputBase));
    FileOutputFormat.setOutputPath(job, new Path(outputBase));

    job.setJarByClass(ByteCount.class);

    boolean success = job.waitForCompletion(true);

    Counters counters = job.getCounters();
    System.out.println("\tRead counters");
    printCounter(counters, READ_COUNTER.BYTES_READ);
    printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ);
    printCounter(counters, READ_COUNTER.SCR_BYTES_READ);
    printCounter(counters, READ_COUNTER.ZCR_BYTES_READ);

    System.exit(success ? 0 : 1);
}

From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java

License:Apache License

public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {//from  w w w .  j  ava2s  .  c  o m
        try {
            // delete the table if it already exists.
            s.executeUpdate("DROP TABLE " + DATE_TABLE);
        } catch (SQLException e) {
            // Ignored; proceed regardless of whether we deleted the table;
            // it may have simply not existed.
        }

        // Create the table.
        s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");

        // commit this tx.
        connection.commit();

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(new Path(OUT_DIR), true);

        // now do a dd import
        Job job = new Job(conf);
        job.setMapperClass(ValMapper.class);
        job.setReducerClass(Reducer.class);
        job.setMapOutputKeyClass(DateCol.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(DateCol.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);
        job.getConfiguration().setInt("mapreduce.map.tasks", 2);
        FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
        DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null);
        DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);

        boolean ret = job.waitForCompletion(true);
        assertTrue("job failed", ret);

        // Check to see that we imported as much as we thought we did.
        assertEquals("Did not get all the records", 4, job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
    } finally {
        s.close();
    }
}

From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java

License:Apache License

@Override
public long getNumMapOutputRecords(Job job) throws IOException, InterruptedException {
    return job.getCounters().findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue();
}

From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java

License:Apache License

@Override
public long getNumMapInputRecords(Job job) throws IOException, InterruptedException {
    return job.getCounters().findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
}