Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.apache.gobblin.compaction.action.CompactionCompleteFileOperationAction.java

License:Apache License

/**
 * Replace or append the destination folder with new files from map-reduce job
 * Create a record count file containing the number of records that have been processed .
 *///w ww. j a va 2 s. c  o  m
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
    if (configurator != null && configurator.isJobCreated()) {
        CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
        Path tmpPath = configurator.getMrOutputPath();
        Path dstPath = new Path(result.getDstAbsoluteDir());

        // this is append delta mode due to the compaction rename source dir mode being enabled
        boolean appendDeltaOutput = this.state.getPropAsBoolean(
                MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED,
                MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED);

        Job job = this.configurator.getConfiguredJob();

        long newTotalRecords = 0;
        long oldTotalRecords = helper.readRecordCount(new Path(result.getDstAbsoluteDir()));
        long executeCount = helper.readExecutionCount(new Path(result.getDstAbsoluteDir()));

        List<Path> goodPaths = CompactionJobConfigurator.getGoodFiles(job, tmpPath, this.fs,
                ImmutableList.of(configurator.getFileExtension()));

        if (appendDeltaOutput) {
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state,
                    MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath, permission);
            // append files under mr output to destination
            for (Path filePath : goodPaths) {
                String fileName = filePath.getName();
                log.info(String.format("Adding %s to %s", filePath.toString(), dstPath));
                Path outPath = new Path(dstPath, fileName);

                if (!this.fs.rename(filePath, outPath)) {
                    throw new IOException(
                            String.format("Unable to move %s to %s", filePath.toString(), outPath.toString()));
                }
            }

            // Obtain record count from input file names.
            // We don't get record count from map-reduce counter because in the next run, the threshold (delta record)
            // calculation is based on the input file names. By pre-defining which input folders are involved in the
            // MR execution, it is easy to track how many files are involved in MR so far, thus calculating the number of total records
            // (all previous run + current run) is possible.
            newTotalRecords = this.configurator.getFileNameRecordCount();
        } else {
            this.fs.delete(dstPath, true);
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state,
                    MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());

            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission);
            if (!this.fs.rename(tmpPath, dstPath)) {
                throw new IOException(String.format("Unable to move %s to %s", tmpPath, dstPath));
            }

            // Obtain record count from map reduce job counter
            // We don't get record count from file name because tracking which files are actually involved in the MR execution can
            // be hard. This is due to new minutely data is rolled up to hourly folder but from daily compaction perspective we are not
            // able to tell which file are newly added (because we simply pass all hourly folders to MR job instead of individual files).
            Counter counter = job.getCounters().findCounter(RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT);
            newTotalRecords = counter.getValue();
        }

        State compactState = helper.loadState(new Path(result.getDstAbsoluteDir()));
        compactState.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords));
        compactState.setProp(CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1));
        compactState.setProp(CompactionSlaEventHelper.MR_JOB_ID,
                this.configurator.getConfiguredJob().getJobID().toString());
        helper.saveState(new Path(result.getDstAbsoluteDir()), compactState);

        log.info("Updating record count from {} to {} in {} [{}]", oldTotalRecords, newTotalRecords, dstPath,
                executeCount + 1);

        // submit events for record count
        if (eventSubmitter != null) {
            Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN,
                    dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL,
                    Long.toString(newTotalRecords), CompactionSlaEventHelper.PREV_RECORD_COUNT_TOTAL,
                    Long.toString(oldTotalRecords), CompactionSlaEventHelper.EXEC_COUNT_TOTAL,
                    Long.toString(executeCount + 1), CompactionSlaEventHelper.MR_JOB_ID,
                    this.configurator.getConfiguredJob().getJobID().toString());
            this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT,
                    eventMetadataMap);
        }
    }
}

From source file:org.apache.gora.examples.mapreduce.QueryCounter.java

License:Apache License

/**
 * Returns the number of results to the Query
 *//*from  w ww .j a v  a 2s. c o  m*/
public long countQuery(DataStore<K, T> dataStore, Query<K, T> query) throws Exception {
    Job job = createJob(dataStore, query);
    job.waitForCompletion(true);
    assert (job.isComplete() == true);

    return job.getCounters().findCounter(COUNTER_GROUP, ROWS).getValue();
}

From source file:org.apache.gora.examples.mapreduce.QueryCounter.java

License:Apache License

/**
 * Returns the number of results to the Query obtained by the
 * {@link #getQuery(DataStore)} method.//from   w  w w  .  ja  va  2s.c o m
 */
public long countQuery(DataStore<K, T> dataStore) throws Exception {
    Query<K, T> query = getQuery(dataStore);

    Job job = createJob(dataStore, query);
    job.waitForCompletion(true);
    assert (job.isComplete() == true);

    return job.getCounters().findCounter(COUNTER_GROUP, ROWS).getValue();
}

From source file:org.apache.hadoop.examples.WordMedian.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }/*  w ww  .java 2 s.  c  o m*/

    setConf(new Configuration());
    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.hcatalog.mapreduce.HCatMapReduceTest.java

License:Apache License

/**
 * Run a local map reduce job to load data from in memory records to an HCatalog Table
 * @param partitionValues//from   w  w  w  . j a  v  a  2s . c o  m
 * @param partitionColumns
 * @param records data to be written to HCatalog table
 * @param writeCount
 * @param assertWrite
 * @param asSingleMapTask
 * @return
 * @throws Exception
 */
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns,
        List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask)
        throws Exception {

    writeRecords = records;
    MapCreate.writeCount = 0;

    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce write test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapCreate.class);

    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);

    if (asSingleMapTask) {
        // One input path would mean only one map task
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount);
        TextInputFormat.setInputPaths(job, path);
    } else {
        // Create two input paths so that two map tasks get triggered. There could be other ways
        // to trigger two map tasks.
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount / 2);

        Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
        createInputFile(path2, (writeCount - writeCount / 2));

        TextInputFormat.setInputPaths(job, path, path2);
    }

    job.setOutputFormatClass(HCatOutputFormat.class);

    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);

    job.setNumReduceTasks(0);

    HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));

    boolean success = job.waitForCompletion(true);

    // Ensure counters are set when data has actually been read.
    if (partitionValues != null) {
        assertTrue(
                job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0);
    }

    if (!HCatUtil.isHadoop23()) {
        // Local mode outputcommitter hook is not invoked in Hadoop 1.x
        if (success) {
            new FileOutputCommitterContainer(job, null).commitJob(job);
        } else {
            new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
        }
    }
    if (assertWrite) {
        // we assert only if we expected to assert with this call.
        Assert.assertEquals(writeCount, MapCreate.writeCount);
    }

    return job;
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatMapReduceTest.java

License:Apache License

/**
 * Run a local map reduce job to load data from in memory records to an HCatalog Table
 * @param partitionValues//www  . ja  v a2 s. c om
 * @param partitionColumns
 * @param records data to be written to HCatalog table
 * @param writeCount
 * @param assertWrite
 * @param asSingleMapTask
 * @return
 * @throws Exception
 */
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns,
        List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask,
        String customDynamicPathPattern) throws Exception {

    writeRecords = records;
    MapCreate.writeCount = 0;

    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce write test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapCreate.class);

    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);

    if (asSingleMapTask) {
        // One input path would mean only one map task
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount);
        TextInputFormat.setInputPaths(job, path);
    } else {
        // Create two input paths so that two map tasks get triggered. There could be other ways
        // to trigger two map tasks.
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount / 2);

        Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
        createInputFile(path2, (writeCount - writeCount / 2));

        TextInputFormat.setInputPaths(job, path, path2);
    }

    job.setOutputFormatClass(HCatOutputFormat.class);

    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    if (customDynamicPathPattern != null) {
        job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
    }
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);

    job.setNumReduceTasks(0);

    HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));

    boolean success = job.waitForCompletion(true);

    // Ensure counters are set when data has actually been read.
    if (partitionValues != null) {
        assertTrue(
                job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0);
    }

    if (!HCatUtil.isHadoop23()) {
        // Local mode outputcommitter hook is not invoked in Hadoop 1.x
        if (success) {
            new FileOutputCommitterContainer(job, null).commitJob(job);
        } else {
            new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
        }
    }
    if (assertWrite) {
        // we assert only if we expected to assert with this call.
        Assert.assertEquals(writeCount, MapCreate.writeCount);
    }

    if (isTableExternal()) {
        externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
    }

    return job;
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./*from  w w w.  j  a va  2  s .  c  om*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./* w  w  w  .  j  a  v a 2 s.c  o m*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval.//from   w  w w. j  a va  2  s. c  om
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    try {
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(TestCountingMapper.class);
        job.setReducerClass(TestCountingReducer.class);
        job.setCombinerClass(TestCountingCombiner.class);

        FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT));

        job.submit();

        final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

        assertEquals(0, cntr.getValue());

        cntr.increment(10);

        assertEquals(10, cntr.getValue());

        // Transferring to map phase.
        setupLockFile.delete();

        // Transferring to reduce phase.
        mapLockFile.delete();

        job.waitForCompletion(false);

        assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

        final Counters counters = job.getCounters();

        assertNotNull("counters cannot be null", counters);
        assertEquals("wrong counters count", 3, counters.countCounters());
        assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
    } catch (Throwable t) {
        log.error("Unexpected exception", t);
    } finally {
        job.getCluster().close();
    }
}

From source file:org.apache.kudu.mapreduce.ITInputFormatJob.java

License:Apache License

private void createAndTestJob(Configuration conf, List<KuduPredicate> predicates, int expectedCount)
        throws Exception {
    String jobName = ITInputFormatJob.class.getName();
    Job job = new Job(conf, jobName);

    Class<TestMapperTableInput> mapperClass = TestMapperTableInput.class;
    job.setJarByClass(mapperClass);//from   ww  w. j  a  v  a 2 s. c  om
    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(NullOutputFormat.class);
    KuduTableMapReduceUtil.TableInputFormatConfigurator configurator = new KuduTableMapReduceUtil.TableInputFormatConfigurator(
            job, TABLE_NAME, "*", getMasterAddresses()).operationTimeoutMs(DEFAULT_SLEEP).addDependencies(false)
                    .cacheBlocks(false);
    for (KuduPredicate predicate : predicates) {
        configurator.addPredicate(predicate);
    }
    configurator.configure();

    assertTrue("Test job did not end properly", job.waitForCompletion(true));

    assertEquals(expectedCount, job.getCounters().findCounter(Counters.ROWS).getValue());
}