List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from w ww . jav a 2s .com addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1)); addOption("usesLongIDs", null, "input contains long IDs that need to be translated"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } numFeatures = Integer.parseInt(getOption("numFeatures")); numIterations = Integer.parseInt(getOption("numIterations")); lambda = Double.parseDouble(getOption("lambda")); alpha = Double.parseDouble(getOption("alpha")); implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback")); numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver")); boolean usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false))); /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ if (usesLongIDs) { Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); mapUsers.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.USER_ID_POS)); mapUsers.waitForCompletion(true); Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); mapItems.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.ITEM_ID_POS)); mapItems.waitForCompletion(true); } /* create A' */ Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumCombiner.class); itemRatings.getConfiguration().set(USES_LONG_IDS, String.valueOf(usesLongIDs)); boolean succeeded = itemRatings.waitForCompletion(true); if (!succeeded) { return -1; } /* create A */ Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeUserVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setCombinerClass(MergeVectorsCombiner.class); succeeded = userRatings.waitForCompletion(true); if (!succeeded) { return -1; } //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); succeeded = averageItemRatings.waitForCompletion(true); if (!succeeded) { return -1; } Vector averageRatings = ALS.readFirstRow(getTempPath("averageRatings"), getConf()); int numItems = averageRatings.getNumNondefaultElements(); int numUsers = (int) userRatings.getCounters().findCounter(Stats.NUM_USERS).getValue(); log.info("Found {} users and {} items", numUsers, numItems); /* create an initial M */ initializeM(averageRatings); for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) { /* broadcast M, read A row-wise, recompute U row-wise */ log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), currentIteration, "U", numItems); /* broadcast U, read A' row-wise, recompute M row-wise */ log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), currentIteration, "M", numUsers); } return 0; }
From source file:org.gridgain.client.hadoop.GridHadoopClientProtocolSelfTest.java
License:Open Source License
/** * Tests job counters retrieval.//from ww w. jav a2s . c o m * * @throws Exception If failed. */ public void testJobCounters() throws Exception { GridGgfs ggfs = grid(0).ggfs(GridHadoopAbstractSelfTest.ggfsName); ggfs.mkdirs(new GridGgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(ggfs.create(new GridGgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); }
From source file:org.hedera.mapreduce.BasicComputeTermStats.java
License:Apache License
/** * Runs this tool.// w ww . j a v a 2 s . co m */ @Override @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("minimum df").create(DF_MIN_OPTION)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("begin time").create(BEGIN_TIME_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("end time").create(END_TIME_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("end time").create(REDUCE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String preprocessing = cmdline.getOptionValue(PREPROCESSING); int reduceNo; if (cmdline.hasOption(REDUCE_OPTION)) { String reduceNoStr = cmdline.getOptionValue(REDUCE_OPTION); try { reduceNo = Integer.parseInt(reduceNoStr); } catch (NumberFormatException e) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Invalid reduce No. : " + reduceNoStr); } } long begin = 0, end = Long.MAX_VALUE; if (cmdline.hasOption(BEGIN_TIME_OPTION)) { String beginTs = cmdline.getOptionValue(BEGIN_TIME_OPTION); try { begin = TIME_FORMAT.parseMillis(beginTs); } catch (Exception e) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Invalid time format: " + e.getMessage()); } } if (cmdline.hasOption(END_TIME_OPTION)) { String endTs = cmdline.getOptionValue(END_TIME_OPTION); try { end = TIME_FORMAT.parseMillis(endTs); } catch (Exception e) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Invalid time format: " + e.getMessage()); } } LOG.info("Tool name: " + BasicComputeTermStats.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); LOG.info(" - preprocessing: " + preprocessing); getConf().set(PREPROCESSING, preprocessing); setMapperSize("-Xmx5120m"); // skip non-article getConf().setBoolean(WikiRevisionInputFormat.SKIP_NON_ARTICLES, true); // set up range getConf().setLong(REVISION_BEGIN_TIME, begin); getConf().setLong(REVISION_END_TIME, end); Job job = create(BasicComputeTermStats.class.getSimpleName() + ":" + input, BasicComputeTermStats.class); job.setNumReduceTasks(1); if (cmdline.hasOption(DF_MIN_OPTION)) { int dfMin = Integer.parseInt(cmdline.getOptionValue(DF_MIN_OPTION)); LOG.info(" - dfMin: " + dfMin); job.getConfiguration().setInt(HADOOP_DF_MIN_OPTION, dfMin); } FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(WikiFullRevisionJsonInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfIntLong.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PairOfIntLong.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); if (cmdline.hasOption(DF_MIN_OPTION)) { int dfMin = Integer.parseInt(cmdline.getOptionValue(DF_MIN_OPTION)); LOG.info(" - dfMin: " + dfMin); job.getConfiguration().setInt(HADOOP_DF_MIN_OPTION, dfMin); } FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds."); LOG.info("Map Reduce output reducers: " + job.getCounters().findCounter(Records.TERMS).getValue()); return 0; }
From source file:org.huahinframework.manager.rest.service.JobService.java
License:Apache License
/** * @param jobId/*from ww w. ja v a 2s . c o m*/ * @return {@link JSONObject} * @throws IOException * @throws InterruptedException */ private Map<String, Object> getStatus(String jobId) throws IOException, InterruptedException { Map<String, Object> job = null; Cluster cluster = new Cluster(getJobConf()); for (JobStatus jobStatus : cluster.getAllJobStatuses()) { if (jobStatus.getJobID().toString().equals(jobId)) { job = JobUtils.getJob(jobStatus); Job j = cluster.getJob(jobStatus.getJobID()); if (j == null) { break; } Calendar finishTime = Calendar.getInstance(); finishTime.setTimeInMillis(j.getFinishTime()); job.put(Response.FINISH_TIME, finishTime.getTime().toString()); Map<String, Map<String, Long>> groups = new HashMap<String, Map<String, Long>>(); for (String s : j.getCounters().getGroupNames()) { CounterGroup counterGroup = j.getCounters().getGroup(s); Iterator<Counter> ite = counterGroup.iterator(); Map<String, Long> counters = new HashMap<String, Long>(); groups.put(counterGroup.getDisplayName(), counters); while (ite.hasNext()) { Counter counter = (Counter) ite.next(); counters.put(counter.getDisplayName(), counter.getValue()); } } job.put(Response.GROUPS, groups); break; } } return job; }
From source file:org.janusgraph.hadoop.scan.HadoopScanRunner.java
License:Apache License
public static ScanMetrics runJob(org.apache.hadoop.conf.Configuration hadoopConf, Class<? extends InputFormat> inputFormat, String jobName, Class<? extends Mapper> mapperClass) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(hadoopConf); //job.setJarByClass(HadoopScanMapper.class); job.setJarByClass(mapperClass);/*from w ww.j a v a 2 s.com*/ //job.setJobName(HadoopScanMapper.class.getSimpleName() + "[" + scanJob + "]"); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); //job.setMapperClass(HadoopScanMapper.class); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat); boolean success = job.waitForCompletion(true); if (!success) { String f; try { // Just in case one of Job's methods throws an exception f = String.format("MapReduce JobID %s terminated abnormally: %s", job.getJobID().toString(), HadoopCompatLoader.DEFAULT_COMPAT.getJobFailureString(job)); } catch (RuntimeException e) { f = "Job failed (unable to read job status programmatically -- see MapReduce logs for information)"; } throw new IOException(f); } else { return DEFAULT_COMPAT.getMetrics(job.getCounters()); } }
From source file:org.kiji.mapreduce.framework.JobHistoryKijiTable.java
License:Apache License
/** * Helper method to write individual counters to job history table's counter family. * * @param writer The {@link KijiTableWriter} for the job history table. * @param job The {@link Job} whose counters we are recording. * @throws IOException If there is an error writing to the table. *//*from w ww. jav a 2s. c om*/ private void writeIndividualCounters(KijiTableWriter writer, Job job) throws IOException { EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString()); Counters counters = job.getCounters(); for (String grpName : counters.getGroupNames()) { Iterator<Counter> counterIterator = counters.getGroup(grpName).iterator(); while (counterIterator.hasNext()) { Counter ctr = counterIterator.next(); writer.put(jobEntity, JOB_HISTORY_COUNTERS_FAMILY, grpName + ":" + ctr.getName(), ctr.getValue()); } } }
From source file:org.kiji.mapreduce.framework.JobHistoryKijiTable.java
License:Apache License
/** * Writes a job into the JobHistoryKijiTable. * * @param job The job to save.// w ww . j a v a 2 s .c o m * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob(Job job, long startTime, long endTime) throws IOException { KijiTableWriter writer = mKijiTable.openTableWriter(); EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString()); try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_ID_QUALIFIER, startTime, job.getJobID().toString()); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_NAME_QUALIFIER, startTime, job.getJobName()); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_START_TIME_QUALIFIER, startTime, startTime); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_END_TIME_QUALIFIER, startTime, endTime); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_END_STATUS_QUALIFIER, startTime, job.isSuccessful() ? "SUCCEEDED" : "FAILED"); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_COUNTERS_QUALIFIER, startTime, job.getCounters().toString()); job.getConfiguration().writeXml(baos); writer.put(jobEntity, JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime, baos.toString("UTF-8")); writeIndividualCounters(writer, job); } finally { ResourceUtils.closeOrLog(writer); } }
From source file:org.kiji.mapreduce.JobHistoryKijiTable.java
License:Apache License
/** * Writes a job into the JobHistoryKijiTable. * * @param job The job to save./*from www. ja v a 2s. c o m*/ * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob(Job job, long startTime, long endTime) throws IOException { KijiTableWriter writer = mKijiTable.openTableWriter(); EntityId jobEntity = mKijiTable.getEntityId(job.getJobID().toString()); try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); writer.put(jobEntity, "info", "jobId", startTime, job.getJobID().toString()); writer.put(jobEntity, "info", "jobName", startTime, job.getJobName()); writer.put(jobEntity, "info", "startTime", startTime, startTime); writer.put(jobEntity, "info", "endTime", startTime, endTime); writer.put(jobEntity, "info", "counters", startTime, job.getCounters().toString()); job.getConfiguration().writeXml(baos); writer.put(jobEntity, "info", "configuration", startTime, baos.toString("UTF-8")); } finally { IOUtils.closeQuietly(writer); } }
From source file:org.kududb.mapreduce.TestInputFormatJob.java
License:Apache License
private void createAndTestJob(Configuration conf, List<ColumnRangePredicate> predicates, int expectedCount) throws Exception { String jobName = TestInputFormatJob.class.getName(); Job job = new Job(conf, jobName); Class<TestMapperTableInput> mapperClass = TestMapperTableInput.class; job.setJarByClass(mapperClass);/*from ww w .jav a 2 s . co m*/ job.setMapperClass(mapperClass); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); KuduTableMapReduceUtil.TableInputFormatConfigurator configurator = new KuduTableMapReduceUtil.TableInputFormatConfigurator( job, TABLE_NAME, "*", getMasterAddresses()).operationTimeoutMs(DEFAULT_SLEEP).addDependencies(false) .cacheBlocks(false); for (ColumnRangePredicate predicate : predicates) { configurator.addColumnRangePredicate(predicate); } configurator.configure(); assertTrue("Test job did not end properly", job.waitForCompletion(true)); assertEquals(expectedCount, job.getCounters().findCounter(Counters.ROWS).getValue()); }
From source file:org.lilyproject.mapreduce.test.MapReduceTest.java
License:Apache License
private long getTotalLaunchedMaps(Job job) throws IOException { return job.getCounters().findCounter("org.apache.hadoop.mapreduce.JobCounter", "TOTAL_LAUNCHED_MAPS") .getValue();/*w w w. j a v a 2 s .c om*/ }