Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);//w  w w.j  a v  a2s  .c o m

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);/*from   ww w  . j a  v a2  s . c o  m*/

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tomslabs.grid.avro.AvroWordCountTest.java

License:Apache License

@Test
public void testMapReduce() throws Throwable {
    FileSystem fs = FileSystem.get(localConf);
    String inputFile = AvroIOUtils.createAvroInputFile(inputDir, "foo", "foo", "bar", "baz", "foo", "baz");
    AvroIOUtils.dumpAvroFiles(inputDir);
    Path input = localFileToPath(inputFile).getParent();
    Path countOutput = new Path(outputDir.getAbsolutePath());
    fs.delete(countOutput, true);//from   w  w  w.  j a v  a 2  s. c  o  m

    Job countJob = AvroWordCount.createSubmitableJob(localConf, input, countOutput);
    assertTrue("count job failed", countJob.waitForCompletion(true));

    CounterGroup group = countJob.getCounters().getGroup("org.apache.hadoop.mapred.Task$Counter");
    assertEquals("Wrong number of mapper input records", 6, group.findCounter("MAP_INPUT_RECORDS").getValue());
    assertEquals("Wrong number of mapper output records", 6,
            group.findCounter("MAP_OUTPUT_RECORDS").getValue());
    assertEquals("Wrong number of reduce output records", 3,
            group.findCounter("REDUCE_OUTPUT_RECORDS").getValue());

    AvroIOUtils.dumpAvroFiles(outputDir);

    Map<String, Integer> res = readOutput(outputDir);
    assertEquals(3, res.size());

    assertTrue(res.containsKey("foo"));
    assertEquals(3, res.get("foo").intValue());
    assertTrue(res.containsKey("bar"));
    assertEquals(1, res.get("bar").intValue());
    assertTrue(res.containsKey("baz"));
    assertEquals(2, res.get("baz").intValue());

}

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public static long run(Configuration conf, DistributedRowMatrix X, Vector xColSumVec, DistributedRowMatrix A,
        DistributedRowMatrix Yt, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + ErrDMJ.class.getName());
    if (X.numRows() != A.numRows()) {
        throw new CardinalityException(A.numRows(), A.numRows());
    }/*from w w w  .j a  va 2 s. c  o m*/
    if (A.numCols() != Yt.numCols()) {
        throw new CardinalityException(A.numCols(), Yt.numCols());
    }
    if (X.numCols() != Yt.numRows()) {
        throw new CardinalityException(X.numCols(), Yt.numRows());
    }
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    ErrDMJ job = new ErrDMJ();
    long totalErr = -1;
    if (!fs.exists(outPath)) {
        Job hJob = job.run(conf, X.getRowPath(), A.getRowPath(), Yt.getRowPath(), outPath, A.numRows(),
                Yt.numRows(), Yt.numCols());
        Counters counters = hJob.getCounters();
        counters.findCounter("Result", "sumAbs").getValue();
        log.info("FINAL ERR is " + totalErr);
    } else {
        log.warn("----------- Skip already exists: " + outPath);
    }
    Vector sumErrVec = AlgebraCommon.mapDirToSparseVector(outPath, 1, X.numCols(), conf);
    double maxColErr = Double.MIN_VALUE;
    double sumColErr = 0;
    int cntColErr = 0;
    Iterator<Vector.Element> it = sumErrVec.nonZeroes().iterator();
    while (it.hasNext()) {
        Vector.Element el = it.next();
        double errP2 = el.get();
        double origP2 = xColSumVec.get(el.index());
        double colErr = Math.sqrt(errP2 / origP2);
        log.info("col: " + el.index() + " sum(err^2): " + errP2 + " sum(val^2): " + origP2 + " colErr: "
                + colErr);
        maxColErr = Math.max(colErr, maxColErr);
        sumColErr += colErr;
        cntColErr++;
    }
    log.info(" Max Col Err: " + maxColErr);
    log.info(" Avg Col Err: " + sumColErr / cntColErr);
    return totalErr;
}

From source file:com.twitter.algebra.nmf.ReindexerJob.java

License:Apache License

public static int index(Configuration conf, Path input, Path tmpPath, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path outputPath = new Path(tmpPath, label);
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    ReindexerJob job = new ReindexerJob();
    if (!fs.exists(outputPath)) {
        Job mrJob = job.run(conf, input, outputPath);
        long totalIndex = mrJob.getCounters().getGroup(TOTALINDEX_COUNTER_GROUP)
                .findCounter(TOTALINDEX_COUNTER_NAME).getValue();
        return (int) totalIndex;
    } else {/*from   www . j ava  2 s  . c  o m*/
        log.warn("----------- Skip already exists: " + outputPath);
        return -1;
    }
}

From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMappingBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override//from w  w  w . j ava 2 s  .  co m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file")
            .create(OUTPUT_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr|it").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));
    options.addOption(KEEP_ALL_OPTION, false, "keep all pages");

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION);
    boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION);

    String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - keep all pages: " + keepAll);
    LOG.info(" - language: " + language);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WikipediaDocnoMappingBuilder.class);
    job.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language));

    job.getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll);
    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }
    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(job, false);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    if (job.waitForCompletion(true)) {

        //         long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue();
        long cnt = job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
        WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-r-00000",
                (int) cnt, outputFile);
        FileSystem.get(getConf()).delete(new Path(tmpPath), true);
        return 0;

    } else {
        return -1;
    }
}

From source file:connected.components.HashGreaterToMin.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    int iterationCount = 0;
    long terminationValue = 1;
    Job job;
    while (terminationValue > 0) {
        job = jobConfig();//from w w w.j a va  2s .c  om

        if ((iterationCount + 1) % 3 == 0) {
            job.setMapperClass(MapHashGreaterToMin.class);
        } else {
            job.setMapperClass(MapHashMin.class);
        }

        String input, output;
        if (iterationCount == 0) // for the first iteration the input will be the first input argument
        {
            input = args[0];
        } else // for the remaining iterations, the input will be the output of the previous iteration
        {
            input = args[1] + iterationCount;
        }
        output = args[1] + (iterationCount + 1);
        System.out.println("Input:" + input);
        System.out.println("Output:" + output);
        FileInputFormat.setInputPaths(job, new Path(input)); // setting the input files for the job
        FileOutputFormat.setOutputPath(job, new Path(output)); // setting the output files for the job
        job.waitForCompletion(true); // wait for the job to complete
        Counters jobCntrs = job.getCounters();
        //   Counter jobCntr=job.      
        terminationValue = jobCntrs.findCounter(MoreIterations.numberOfIterations).getValue();
        System.out.println("\n Round " + iterationCount + " => #Communications : " + (numberOfComm - precomm));
        precomm = numberOfComm;
        iterationCount++;
    }
    System.out.println(
            " Number of MR rounds: " + iterationCount + "\n Number of Communications: " + numberOfComm);
    return 0;

}

From source file:connected.components.HashToAlternate.java

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    long startTime = System.nanoTime();
    args[0] = "/home/ro0t/Desktop/BTP/graph/input1.txt";
    Path inputPath = new Path(args[0]);
    Path basePath = new Path(args[1]);
    Path outputPath = null;//  ww w .  j  av  a2s .c o m
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(basePath, true);
    int iterationCount = 0;
    long terminationValue = 1;
    Job job;
    while (terminationValue > 0) {
        job = jobConfig();
        if (iterationCount % 2 != 0) {
            job.setMapperClass(MapM.class);
        } else {
            job.setMapperClass(Map.class);
        }
        if (iterationCount != 0) {// for the first iteration the input will
            // be the first input argument
            if (iterationCount > 1) {
                fs.delete(inputPath, true);
            }
            inputPath = outputPath;
        }
        outputPath = new Path(basePath, iterationCount + "");
        FileInputFormat.setInputPaths(job, inputPath); // setting the
        FileOutputFormat.setOutputPath(job, outputPath); // setting
        job.waitForCompletion(true); // wait for the job to complete
        Counters jobCntrs = job.getCounters();
        terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue();
        iterationCount++;
        long comm = jobCntrs.findCounter(MRrounds.numberOfComunications).getValue();
        long precom = jobCntrs.findCounter(MRrounds.precomm).getValue();
        System.out.println("\n Round " + iterationCount + " => #Communications : " + (comm - precom));
        jobCntrs.findCounter(MRrounds.precomm).setValue(comm);
    }
    long estimatedTime = System.nanoTime() - startTime;
    System.out.println(" \nNumber of MR rounds: " + iterationCount + " Time of Completion: "
            + estimatedTime / 1000000000 + "\n");
    return 0;

}

From source file:connected.components.HashToMin.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    long precomm = 0;
    args[0] = "/home/ro0t/Desktop/BTP/graph/input1.txt";
    Path inputPath = new Path(args[0]);
    Path basePath = new Path(args[1]);
    Path outputPath = null;//from   w  ww .j a  v  a  2 s . com
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(basePath, true);
    long startTime = System.nanoTime();
    int iterationCount = 0;
    long terminationValue = 1;
    Job job;
    while (terminationValue > 0) {
        job = jobConfig();
        if (iterationCount != 0) {
            if (iterationCount > 1) {
                fs.delete(inputPath, true);
            }
            inputPath = outputPath;
        }
        outputPath = new Path(basePath, iterationCount + "");
        FileInputFormat.setInputPaths(job, inputPath); // setting the
        FileOutputFormat.setOutputPath(job, outputPath); // setting
        job.waitForCompletion(true); // wait for the job to complete
        Counters jobCntrs = job.getCounters();
        terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue();
        iterationCount++;
        System.out.println(
                "\n Round " + iterationCount + " => #Communications : " + (numberOfComunications - precomm));
        precomm = numberOfComunications;
    }
    long estimatedTime = System.nanoTime() - startTime;
    System.out.println(" \nNumber of MR rounds: " + iterationCount + " Number of Communications: "
            + numberOfComunications + " Time of Completion: " + estimatedTime / 1000000000 + "\n");

    return 0;

}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {
        if (args.length != 1) {
            JobBuilder.printUsage(this, "<job ID>");
            return -1;
        }//from w w  w. j ava2 s  .  c o  m
        String jobID = args[0];
        // vv NewMissingTemperatureFields
        Cluster cluster = new Cluster(getConf());
        Job job = cluster.getJob(JobID.forName(jobID));
        // ^^ NewMissingTemperatureFields
        if (job == null) {
            System.err.printf("No job with ID %s found.\n", jobID);
            return -1;
        }
        if (!job.isComplete()) {
            System.err.printf("Job %s is not complete.\n", jobID);
            return -1;
        }

        // vv NewMissingTemperatureFields
        Counters counters = job.getCounters();
        long missing = counters.findCounter(MaxTemperatureWithCounters.Temperature.MISSING).getValue();
        long total = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
        // ^^ NewMissingTemperatureFields

        System.out.printf("Records with missing temperature fields: %.2f%%\n", 100.0 * missing / total);
        return 0;
    }