List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.apache.gobblin.compaction.action.CompactionCompleteFileOperationAction.java
License:Apache License
/** * Replace or append the destination folder with new files from map-reduce job * Create a record count file containing the number of records that have been processed . *///w ww. j a va 2 s. c o m public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException { if (configurator != null && configurator.isJobCreated()) { CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset); Path tmpPath = configurator.getMrOutputPath(); Path dstPath = new Path(result.getDstAbsoluteDir()); // this is append delta mode due to the compaction rename source dir mode being enabled boolean appendDeltaOutput = this.state.getPropAsBoolean( MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED, MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED); Job job = this.configurator.getConfiguredJob(); long newTotalRecords = 0; long oldTotalRecords = helper.readRecordCount(new Path(result.getDstAbsoluteDir())); long executeCount = helper.readExecutionCount(new Path(result.getDstAbsoluteDir())); List<Path> goodPaths = CompactionJobConfigurator.getGoodFiles(job, tmpPath, this.fs, ImmutableList.of(configurator.getFileExtension())); if (appendDeltaOutput) { FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault()); WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath, permission); // append files under mr output to destination for (Path filePath : goodPaths) { String fileName = filePath.getName(); log.info(String.format("Adding %s to %s", filePath.toString(), dstPath)); Path outPath = new Path(dstPath, fileName); if (!this.fs.rename(filePath, outPath)) { throw new IOException( String.format("Unable to move %s to %s", filePath.toString(), outPath.toString())); } } // Obtain record count from input file names. // We don't get record count from map-reduce counter because in the next run, the threshold (delta record) // calculation is based on the input file names. By pre-defining which input folders are involved in the // MR execution, it is easy to track how many files are involved in MR so far, thus calculating the number of total records // (all previous run + current run) is possible. newTotalRecords = this.configurator.getFileNameRecordCount(); } else { this.fs.delete(dstPath, true); FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault()); WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission); if (!this.fs.rename(tmpPath, dstPath)) { throw new IOException(String.format("Unable to move %s to %s", tmpPath, dstPath)); } // Obtain record count from map reduce job counter // We don't get record count from file name because tracking which files are actually involved in the MR execution can // be hard. This is due to new minutely data is rolled up to hourly folder but from daily compaction perspective we are not // able to tell which file are newly added (because we simply pass all hourly folders to MR job instead of individual files). Counter counter = job.getCounters().findCounter(RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT); newTotalRecords = counter.getValue(); } State compactState = helper.loadState(new Path(result.getDstAbsoluteDir())); compactState.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords)); compactState.setProp(CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1)); compactState.setProp(CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString()); helper.saveState(new Path(result.getDstAbsoluteDir()), compactState); log.info("Updating record count from {} to {} in {} [{}]", oldTotalRecords, newTotalRecords, dstPath, executeCount + 1); // submit events for record count if (eventSubmitter != null) { Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords), CompactionSlaEventHelper.PREV_RECORD_COUNT_TOTAL, Long.toString(oldTotalRecords), CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1), CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString()); this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT, eventMetadataMap); } } }
From source file:org.apache.gora.examples.mapreduce.QueryCounter.java
License:Apache License
/** * Returns the number of results to the Query *//*from w ww .j a v a 2s. c o m*/ public long countQuery(DataStore<K, T> dataStore, Query<K, T> query) throws Exception { Job job = createJob(dataStore, query); job.waitForCompletion(true); assert (job.isComplete() == true); return job.getCounters().findCounter(COUNTER_GROUP, ROWS).getValue(); }
From source file:org.apache.gora.examples.mapreduce.QueryCounter.java
License:Apache License
/** * Returns the number of results to the Query obtained by the * {@link #getQuery(DataStore)} method.//from w w w . ja va 2s.c o m */ public long countQuery(DataStore<K, T> dataStore) throws Exception { Query<K, T> query = getQuery(dataStore); Job job = createJob(dataStore, query); job.waitForCompletion(true); assert (job.isComplete() == true); return job.getCounters().findCounter(COUNTER_GROUP, ROWS).getValue(); }
From source file:org.apache.hadoop.examples.WordMedian.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }/* w ww .java 2 s. c o m*/ setConf(new Configuration()); Configuration conf = getConf(); Job job = Job.getInstance(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:org.apache.hcatalog.mapreduce.HCatMapReduceTest.java
License:Apache License
/** * Run a local map reduce job to load data from in memory records to an HCatalog Table * @param partitionValues//from w w w . j a v a 2s . c o m * @param partitionColumns * @param records data to be written to HCatalog table * @param writeCount * @param assertWrite * @param asSingleMapTask * @return * @throws Exception */ Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { writeRecords = records; MapCreate.writeCount = 0; Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce write test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapCreate.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); if (asSingleMapTask) { // One input path would mean only one map task Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount); TextInputFormat.setInputPaths(job, path); } else { // Create two input paths so that two map tasks get triggered. There could be other ways // to trigger two map tasks. Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount / 2); Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); createInputFile(path2, (writeCount - writeCount / 2)); TextInputFormat.setInputPaths(job, path, path2); } job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); boolean success = job.waitForCompletion(true); // Ensure counters are set when data has actually been read. if (partitionValues != null) { assertTrue( job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0); } if (!HCatUtil.isHadoop23()) { // Local mode outputcommitter hook is not invoked in Hadoop 1.x if (success) { new FileOutputCommitterContainer(job, null).commitJob(job); } else { new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); } } if (assertWrite) { // we assert only if we expected to assert with this call. Assert.assertEquals(writeCount, MapCreate.writeCount); } return job; }
From source file:org.apache.hive.hcatalog.mapreduce.HCatMapReduceTest.java
License:Apache License
/** * Run a local map reduce job to load data from in memory records to an HCatalog Table * @param partitionValues//www . ja v a2 s. c om * @param partitionColumns * @param records data to be written to HCatalog table * @param writeCount * @param assertWrite * @param asSingleMapTask * @return * @throws Exception */ Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception { writeRecords = records; MapCreate.writeCount = 0; Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce write test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapCreate.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); if (asSingleMapTask) { // One input path would mean only one map task Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount); TextInputFormat.setInputPaths(job, path); } else { // Create two input paths so that two map tasks get triggered. There could be other ways // to trigger two map tasks. Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount / 2); Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); createInputFile(path2, (writeCount - writeCount / 2)); TextInputFormat.setInputPaths(job, path, path2); } job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); if (customDynamicPathPattern != null) { job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern); } HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); boolean success = job.waitForCompletion(true); // Ensure counters are set when data has actually been read. if (partitionValues != null) { assertTrue( job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0); } if (!HCatUtil.isHadoop23()) { // Local mode outputcommitter hook is not invoked in Hadoop 1.x if (success) { new FileOutputCommitterContainer(job, null).commitJob(job); } else { new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); } } if (assertWrite) { // we assert only if we expected to assert with this call. Assert.assertEquals(writeCount, MapCreate.writeCount); } if (isTableExternal()) { externalTableLocation = outputJobInfo.getTableInfo().getTableLocation(); } return job; }
From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval./*from w w w. j a va 2 s . c om*/ * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); }
From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval./* w w w . j a v a 2 s.c o m*/ * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval.//from w w w. j a va 2 s. c om * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); try { job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); } catch (Throwable t) { log.error("Unexpected exception", t); } finally { job.getCluster().close(); } }
From source file:org.apache.kudu.mapreduce.ITInputFormatJob.java
License:Apache License
private void createAndTestJob(Configuration conf, List<KuduPredicate> predicates, int expectedCount) throws Exception { String jobName = ITInputFormatJob.class.getName(); Job job = new Job(conf, jobName); Class<TestMapperTableInput> mapperClass = TestMapperTableInput.class; job.setJarByClass(mapperClass);//from ww w. j a v a 2 s. c om job.setMapperClass(mapperClass); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); KuduTableMapReduceUtil.TableInputFormatConfigurator configurator = new KuduTableMapReduceUtil.TableInputFormatConfigurator( job, TABLE_NAME, "*", getMasterAddresses()).operationTimeoutMs(DEFAULT_SLEEP).addDependencies(false) .cacheBlocks(false); for (KuduPredicate predicate : predicates) { configurator.addPredicate(predicate); } configurator.configure(); assertTrue("Test job did not end properly", job.waitForCompletion(true)); assertEquals(expectedCount, job.getCounters().findCounter(Counters.ROWS).getValue()); }