Example usage for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();//from  w ww  .  jav a 2s  .com
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1));
    addOption("usesLongIDs", null, "input contains long IDs that need to be translated");

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    numFeatures = Integer.parseInt(getOption("numFeatures"));
    numIterations = Integer.parseInt(getOption("numIterations"));
    lambda = Double.parseDouble(getOption("lambda"));
    alpha = Double.parseDouble(getOption("alpha"));
    implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback"));

    numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver"));
    boolean usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false)));

    /*
    * compute the factorization A = U M'
    *
    * where A (users x items) is the matrix of known ratings
    *           U (users x features) is the representation of users in the feature space
    *           M (items x features) is the representation of items in the feature space
    */

    if (usesLongIDs) {
        Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class,
                MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class,
                VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class);
        mapUsers.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.USER_ID_POS));
        mapUsers.waitForCompletion(true);

        Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class,
                MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class,
                VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class);
        mapItems.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.ITEM_ID_POS));
        mapItems.waitForCompletion(true);
    }

    /* create A' */
    Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class,
            ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
    itemRatings.setCombinerClass(VectorSumCombiner.class);
    itemRatings.getConfiguration().set(USES_LONG_IDS, String.valueOf(usesLongIDs));
    boolean succeeded = itemRatings.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }

    /* create A */
    Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
            IntWritable.class, VectorWritable.class, MergeUserVectorsReducer.class, IntWritable.class,
            VectorWritable.class);
    userRatings.setCombinerClass(MergeVectorsCombiner.class);
    succeeded = userRatings.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }

    //TODO this could be fiddled into one of the upper jobs
    Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
            AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
            IntWritable.class, VectorWritable.class);
    averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
    succeeded = averageItemRatings.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }

    Vector averageRatings = ALS.readFirstRow(getTempPath("averageRatings"), getConf());

    int numItems = averageRatings.getNumNondefaultElements();
    int numUsers = (int) userRatings.getCounters().findCounter(Stats.NUM_USERS).getValue();

    log.info("Found {} users and {} items", numUsers, numItems);

    /* create an initial M */
    initializeM(averageRatings);

    for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) {
        /* broadcast M, read A row-wise, recompute U row-wise */
        log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
        runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                currentIteration, "U", numItems);
        /* broadcast U, read A' row-wise, recompute M row-wise */
        log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
        runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), currentIteration,
                "M", numUsers);
    }

    return 0;
}

From source file:org.gridgain.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Open Source License

/**
 * Tests job counters retrieval.//from  ww w. jav  a2s . c o  m
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    GridGgfs ggfs = grid(0).ggfs(GridHadoopAbstractSelfTest.ggfsName);

    ggfs.mkdirs(new GridGgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(ggfs.create(new GridGgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.hedera.mapreduce.BasicComputeTermStats.java

License:Apache License

/**
 * Runs this tool.// w ww .  j a v a  2 s . co  m
 */
@Override
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("minimum df").create(DF_MIN_OPTION));
    options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg()
            .withDescription("preprocessing").create(PREPROCESSING));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("begin time").create(BEGIN_TIME_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("end time").create(END_TIME_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("end time").create(REDUCE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(PREPROCESSING)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String preprocessing = cmdline.getOptionValue(PREPROCESSING);
    int reduceNo;

    if (cmdline.hasOption(REDUCE_OPTION)) {
        String reduceNoStr = cmdline.getOptionValue(REDUCE_OPTION);
        try {
            reduceNo = Integer.parseInt(reduceNoStr);
        } catch (NumberFormatException e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            System.err.println("Invalid reduce No. : " + reduceNoStr);
        }
    }

    long begin = 0, end = Long.MAX_VALUE;
    if (cmdline.hasOption(BEGIN_TIME_OPTION)) {
        String beginTs = cmdline.getOptionValue(BEGIN_TIME_OPTION);
        try {
            begin = TIME_FORMAT.parseMillis(beginTs);
        } catch (Exception e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            System.err.println("Invalid time format: " + e.getMessage());
        }
    }

    if (cmdline.hasOption(END_TIME_OPTION)) {
        String endTs = cmdline.getOptionValue(END_TIME_OPTION);
        try {
            end = TIME_FORMAT.parseMillis(endTs);
        } catch (Exception e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            System.err.println("Invalid time format: " + e.getMessage());
        }
    }

    LOG.info("Tool name: " + BasicComputeTermStats.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);
    LOG.info(" - preprocessing: " + preprocessing);

    getConf().set(PREPROCESSING, preprocessing);

    setMapperSize("-Xmx5120m");

    // skip non-article
    getConf().setBoolean(WikiRevisionInputFormat.SKIP_NON_ARTICLES, true);

    // set up range
    getConf().setLong(REVISION_BEGIN_TIME, begin);
    getConf().setLong(REVISION_END_TIME, end);

    Job job = create(BasicComputeTermStats.class.getSimpleName() + ":" + input, BasicComputeTermStats.class);

    job.setNumReduceTasks(1);

    if (cmdline.hasOption(DF_MIN_OPTION)) {
        int dfMin = Integer.parseInt(cmdline.getOptionValue(DF_MIN_OPTION));
        LOG.info(" - dfMin: " + dfMin);
        job.getConfiguration().setInt(HADOOP_DF_MIN_OPTION, dfMin);
    }

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(WikiFullRevisionJsonInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfIntLong.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PairOfIntLong.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    if (cmdline.hasOption(DF_MIN_OPTION)) {
        int dfMin = Integer.parseInt(cmdline.getOptionValue(DF_MIN_OPTION));
        LOG.info(" - dfMin: " + dfMin);
        job.getConfiguration().setInt(HADOOP_DF_MIN_OPTION, dfMin);
    }

    FileSystem.get(getConf()).delete(new Path(output), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds.");

    LOG.info("Map Reduce output reducers: " + job.getCounters().findCounter(Records.TERMS).getValue());
    return 0;
}

From source file:org.huahinframework.manager.rest.service.JobService.java

License:Apache License

/**
 * @param jobId/*from  ww w.  ja  v  a  2s . c  o m*/
 * @return {@link JSONObject}
 * @throws IOException
 * @throws InterruptedException
 */
private Map<String, Object> getStatus(String jobId) throws IOException, InterruptedException {
    Map<String, Object> job = null;

    Cluster cluster = new Cluster(getJobConf());
    for (JobStatus jobStatus : cluster.getAllJobStatuses()) {
        if (jobStatus.getJobID().toString().equals(jobId)) {
            job = JobUtils.getJob(jobStatus);
            Job j = cluster.getJob(jobStatus.getJobID());
            if (j == null) {
                break;
            }

            Calendar finishTime = Calendar.getInstance();
            finishTime.setTimeInMillis(j.getFinishTime());
            job.put(Response.FINISH_TIME, finishTime.getTime().toString());

            Map<String, Map<String, Long>> groups = new HashMap<String, Map<String, Long>>();
            for (String s : j.getCounters().getGroupNames()) {
                CounterGroup counterGroup = j.getCounters().getGroup(s);
                Iterator<Counter> ite = counterGroup.iterator();

                Map<String, Long> counters = new HashMap<String, Long>();
                groups.put(counterGroup.getDisplayName(), counters);
                while (ite.hasNext()) {
                    Counter counter = (Counter) ite.next();
                    counters.put(counter.getDisplayName(), counter.getValue());
                }
            }

            job.put(Response.GROUPS, groups);
            break;
        }
    }

    return job;
}

From source file:org.janusgraph.hadoop.scan.HadoopScanRunner.java

License:Apache License

public static ScanMetrics runJob(org.apache.hadoop.conf.Configuration hadoopConf,
        Class<? extends InputFormat> inputFormat, String jobName, Class<? extends Mapper> mapperClass)
        throws IOException, InterruptedException, ClassNotFoundException {

    Job job = Job.getInstance(hadoopConf);

    //job.setJarByClass(HadoopScanMapper.class);
    job.setJarByClass(mapperClass);/*from w ww.j a  v  a 2 s.com*/
    //job.setJobName(HadoopScanMapper.class.getSimpleName() + "[" + scanJob + "]");
    job.setJobName(jobName);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);
    //job.setMapperClass(HadoopScanMapper.class);
    job.setMapperClass(mapperClass);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(inputFormat);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        String f;
        try {
            // Just in case one of Job's methods throws an exception
            f = String.format("MapReduce JobID %s terminated abnormally: %s", job.getJobID().toString(),
                    HadoopCompatLoader.DEFAULT_COMPAT.getJobFailureString(job));
        } catch (RuntimeException e) {
            f = "Job failed (unable to read job status programmatically -- see MapReduce logs for information)";
        }
        throw new IOException(f);
    } else {
        return DEFAULT_COMPAT.getMetrics(job.getCounters());
    }
}

From source file:org.kiji.mapreduce.framework.JobHistoryKijiTable.java

License:Apache License

/**
 * Helper method to write individual counters to job history table's counter family.
 *
 * @param writer The {@link KijiTableWriter} for the job history table.
 * @param job The {@link Job} whose counters we are recording.
 * @throws IOException If there is an error writing to the table.
 *//*from w  ww. jav  a 2s. c om*/
private void writeIndividualCounters(KijiTableWriter writer, Job job) throws IOException {
    EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString());
    Counters counters = job.getCounters();
    for (String grpName : counters.getGroupNames()) {
        Iterator<Counter> counterIterator = counters.getGroup(grpName).iterator();
        while (counterIterator.hasNext()) {
            Counter ctr = counterIterator.next();
            writer.put(jobEntity, JOB_HISTORY_COUNTERS_FAMILY, grpName + ":" + ctr.getName(), ctr.getValue());
        }
    }
}

From source file:org.kiji.mapreduce.framework.JobHistoryKijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryKijiTable.
 *
 * @param job The job to save.//  w ww  .  j  a v a  2  s  .c o  m
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(Job job, long startTime, long endTime) throws IOException {
    KijiTableWriter writer = mKijiTable.openTableWriter();
    EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString());
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_ID_QUALIFIER, startTime,
                job.getJobID().toString());
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_NAME_QUALIFIER, startTime, job.getJobName());
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_START_TIME_QUALIFIER, startTime, startTime);
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_END_TIME_QUALIFIER, startTime, endTime);
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_END_STATUS_QUALIFIER, startTime,
                job.isSuccessful() ? "SUCCEEDED" : "FAILED");
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_COUNTERS_QUALIFIER, startTime,
                job.getCounters().toString());
        job.getConfiguration().writeXml(baos);
        writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime,
                baos.toString("UTF-8"));
        writeIndividualCounters(writer, job);
    } finally {
        ResourceUtils.closeOrLog(writer);
    }
}

From source file:org.kiji.mapreduce.JobHistoryKijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryKijiTable.
 *
 * @param job The job to save./*from  www. ja v a 2s. c o m*/
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(Job job, long startTime, long endTime) throws IOException {
    KijiTableWriter writer = mKijiTable.openTableWriter();
    EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString());
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        writer.put(jobEntity, "info", "jobId", startTime, job.getJobID().toString());
        writer.put(jobEntity, "info", "jobName", startTime, job.getJobName());
        writer.put(jobEntity, "info", "startTime", startTime, startTime);
        writer.put(jobEntity, "info", "endTime", startTime, endTime);
        writer.put(jobEntity, "info", "counters", startTime, job.getCounters().toString());
        job.getConfiguration().writeXml(baos);
        writer.put(jobEntity, "info", "configuration", startTime, baos.toString("UTF-8"));
    } finally {
        IOUtils.closeQuietly(writer);
    }
}

From source file:org.kududb.mapreduce.TestInputFormatJob.java

License:Apache License

private void createAndTestJob(Configuration conf, List<ColumnRangePredicate> predicates, int expectedCount)
        throws Exception {
    String jobName = TestInputFormatJob.class.getName();
    Job job = new Job(conf, jobName);

    Class<TestMapperTableInput> mapperClass = TestMapperTableInput.class;
    job.setJarByClass(mapperClass);/*from ww w .jav  a  2 s .  co m*/
    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(NullOutputFormat.class);
    KuduTableMapReduceUtil.TableInputFormatConfigurator configurator = new KuduTableMapReduceUtil.TableInputFormatConfigurator(
            job, TABLE_NAME, "*", getMasterAddresses()).operationTimeoutMs(DEFAULT_SLEEP).addDependencies(false)
                    .cacheBlocks(false);
    for (ColumnRangePredicate predicate : predicates) {
        configurator.addColumnRangePredicate(predicate);
    }
    configurator.configure();

    assertTrue("Test job did not end properly", job.waitForCompletion(true));

    assertEquals(expectedCount, job.getCounters().findCounter(Counters.ROWS).getValue());
}

From source file:org.lilyproject.mapreduce.test.MapReduceTest.java

License:Apache License

private long getTotalLaunchedMaps(Job job) throws IOException {
    return job.getCounters().findCounter("org.apache.hadoop.mapreduce.JobCounter", "TOTAL_LAUNCHED_MAPS")
            .getValue();/*w  w  w.  j  a  v  a  2  s  .c  om*/
}