List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:com.tfm.utad.reducerdata.ReducerDataPig.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-pig"); Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataPig"); job.setJarByClass(ReducerDataPig.class); // Setup MapReduce job.setMapperClass(ReducerDataPigMapper.class); job.setReducerClass(ReducerDataPigReducer.class); job.setNumReduceTasks(1);//w w w.j a v a2s .c o m // Specify key / value job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(ReducerPigKey.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-vertica"); Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataVertica"); job.setJarByClass(ReducerDataVertica.class); // Setup MapReduce job.setMapperClass(ReducerDataVerticaMapper.class); job.setReducerClass(ReducerDataVerticaReducer.class); job.setNumReduceTasks(1);/*from ww w . j a v a2 s . c o m*/ // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(ReducerVerticaValue.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tomslabs.grid.avro.AvroWordCountTest.java
License:Apache License
@Test public void testMapReduce() throws Throwable { FileSystem fs = FileSystem.get(localConf); String inputFile = AvroIOUtils.createAvroInputFile(inputDir, "foo", "foo", "bar", "baz", "foo", "baz"); AvroIOUtils.dumpAvroFiles(inputDir); Path input = localFileToPath(inputFile).getParent(); Path countOutput = new Path(outputDir.getAbsolutePath()); fs.delete(countOutput, true);//from w w w. j a v a 2 s. c o m Job countJob = AvroWordCount.createSubmitableJob(localConf, input, countOutput); assertTrue("count job failed", countJob.waitForCompletion(true)); CounterGroup group = countJob.getCounters().getGroup("org.apache.hadoop.mapred.Task$Counter"); assertEquals("Wrong number of mapper input records", 6, group.findCounter("MAP_INPUT_RECORDS").getValue()); assertEquals("Wrong number of mapper output records", 6, group.findCounter("MAP_OUTPUT_RECORDS").getValue()); assertEquals("Wrong number of reduce output records", 3, group.findCounter("REDUCE_OUTPUT_RECORDS").getValue()); AvroIOUtils.dumpAvroFiles(outputDir); Map<String, Integer> res = readOutput(outputDir); assertEquals(3, res.size()); assertTrue(res.containsKey("foo")); assertEquals(3, res.get("foo").intValue()); assertTrue(res.containsKey("bar")); assertEquals(1, res.get("bar").intValue()); assertTrue(res.containsKey("baz")); assertEquals(2, res.get("baz").intValue()); }
From source file:com.twitter.algebra.nmf.ErrDMJ.java
License:Apache License
public static long run(Configuration conf, DistributedRowMatrix X, Vector xColSumVec, DistributedRowMatrix A, DistributedRowMatrix Yt, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ErrDMJ.class.getName()); if (X.numRows() != A.numRows()) { throw new CardinalityException(A.numRows(), A.numRows()); }/*from w w w .j a va 2 s. c o m*/ if (A.numCols() != Yt.numCols()) { throw new CardinalityException(A.numCols(), Yt.numCols()); } if (X.numCols() != Yt.numRows()) { throw new CardinalityException(X.numCols(), Yt.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ErrDMJ job = new ErrDMJ(); long totalErr = -1; if (!fs.exists(outPath)) { Job hJob = job.run(conf, X.getRowPath(), A.getRowPath(), Yt.getRowPath(), outPath, A.numRows(), Yt.numRows(), Yt.numCols()); Counters counters = hJob.getCounters(); counters.findCounter("Result", "sumAbs").getValue(); log.info("FINAL ERR is " + totalErr); } else { log.warn("----------- Skip already exists: " + outPath); } Vector sumErrVec = AlgebraCommon.mapDirToSparseVector(outPath, 1, X.numCols(), conf); double maxColErr = Double.MIN_VALUE; double sumColErr = 0; int cntColErr = 0; Iterator<Vector.Element> it = sumErrVec.nonZeroes().iterator(); while (it.hasNext()) { Vector.Element el = it.next(); double errP2 = el.get(); double origP2 = xColSumVec.get(el.index()); double colErr = Math.sqrt(errP2 / origP2); log.info("col: " + el.index() + " sum(err^2): " + errP2 + " sum(val^2): " + origP2 + " colErr: " + colErr); maxColErr = Math.max(colErr, maxColErr); sumColErr += colErr; cntColErr++; } log.info(" Max Col Err: " + maxColErr); log.info(" Avg Col Err: " + sumColErr / cntColErr); return totalErr; }
From source file:com.twitter.algebra.nmf.ReindexerJob.java
License:Apache License
public static int index(Configuration conf, Path input, Path tmpPath, String label) throws IOException, InterruptedException, ClassNotFoundException { Path outputPath = new Path(tmpPath, label); FileSystem fs = FileSystem.get(outputPath.toUri(), conf); ReindexerJob job = new ReindexerJob(); if (!fs.exists(outputPath)) { Job mrJob = job.run(conf, input, outputPath); long totalIndex = mrJob.getCounters().getGroup(TOTALINDEX_COUNTER_GROUP) .findCounter(TOTALINDEX_COUNTER_NAME).getValue(); return (int) totalIndex; } else {/*from www . j ava 2 s . c o m*/ log.warn("----------- Skip already exists: " + outputPath); return -1; } }
From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//from w w w . j ava 2 s . co m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr|it").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikipediaDocnoMappingBuilder.class); job.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); job.getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(tmpPath)); FileOutputFormat.setCompressOutput(job, false); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); if (job.waitForCompletion(true)) { // long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); long cnt = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-r-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; } else { return -1; } }
From source file:connected.components.HashGreaterToMin.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub int iterationCount = 0; long terminationValue = 1; Job job; while (terminationValue > 0) { job = jobConfig();//from w w w.j a va 2s .c om if ((iterationCount + 1) % 3 == 0) { job.setMapperClass(MapHashGreaterToMin.class); } else { job.setMapperClass(MapHashMin.class); } String input, output; if (iterationCount == 0) // for the first iteration the input will be the first input argument { input = args[0]; } else // for the remaining iterations, the input will be the output of the previous iteration { input = args[1] + iterationCount; } output = args[1] + (iterationCount + 1); System.out.println("Input:" + input); System.out.println("Output:" + output); FileInputFormat.setInputPaths(job, new Path(input)); // setting the input files for the job FileOutputFormat.setOutputPath(job, new Path(output)); // setting the output files for the job job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); // Counter jobCntr=job. terminationValue = jobCntrs.findCounter(MoreIterations.numberOfIterations).getValue(); System.out.println("\n Round " + iterationCount + " => #Communications : " + (numberOfComm - precomm)); precomm = numberOfComm; iterationCount++; } System.out.println( " Number of MR rounds: " + iterationCount + "\n Number of Communications: " + numberOfComm); return 0; }
From source file:connected.components.HashToAlternate.java
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub long startTime = System.nanoTime(); args[0] = "/home/ro0t/Desktop/BTP/graph/input1.txt"; Path inputPath = new Path(args[0]); Path basePath = new Path(args[1]); Path outputPath = null;// ww w . j av a2s .c o m FileSystem fs = FileSystem.get(getConf()); fs.delete(basePath, true); int iterationCount = 0; long terminationValue = 1; Job job; while (terminationValue > 0) { job = jobConfig(); if (iterationCount % 2 != 0) { job.setMapperClass(MapM.class); } else { job.setMapperClass(Map.class); } if (iterationCount != 0) {// for the first iteration the input will // be the first input argument if (iterationCount > 1) { fs.delete(inputPath, true); } inputPath = outputPath; } outputPath = new Path(basePath, iterationCount + ""); FileInputFormat.setInputPaths(job, inputPath); // setting the FileOutputFormat.setOutputPath(job, outputPath); // setting job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue(); iterationCount++; long comm = jobCntrs.findCounter(MRrounds.numberOfComunications).getValue(); long precom = jobCntrs.findCounter(MRrounds.precomm).getValue(); System.out.println("\n Round " + iterationCount + " => #Communications : " + (comm - precom)); jobCntrs.findCounter(MRrounds.precomm).setValue(comm); } long estimatedTime = System.nanoTime() - startTime; System.out.println(" \nNumber of MR rounds: " + iterationCount + " Time of Completion: " + estimatedTime / 1000000000 + "\n"); return 0; }
From source file:connected.components.HashToMin.java
License:Open Source License
@Override public int run(String[] args) throws Exception { long precomm = 0; args[0] = "/home/ro0t/Desktop/BTP/graph/input1.txt"; Path inputPath = new Path(args[0]); Path basePath = new Path(args[1]); Path outputPath = null;//from w ww .j a v a 2 s . com FileSystem fs = FileSystem.get(getConf()); fs.delete(basePath, true); long startTime = System.nanoTime(); int iterationCount = 0; long terminationValue = 1; Job job; while (terminationValue > 0) { job = jobConfig(); if (iterationCount != 0) { if (iterationCount > 1) { fs.delete(inputPath, true); } inputPath = outputPath; } outputPath = new Path(basePath, iterationCount + ""); FileInputFormat.setInputPaths(job, inputPath); // setting the FileOutputFormat.setOutputPath(job, outputPath); // setting job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue(); iterationCount++; System.out.println( "\n Round " + iterationCount + " => #Communications : " + (numberOfComunications - precomm)); precomm = numberOfComunications; } long estimatedTime = System.nanoTime() - startTime; System.out.println(" \nNumber of MR rounds: " + iterationCount + " Number of Communications: " + numberOfComunications + " Time of Completion: " + estimatedTime / 1000000000 + "\n"); return 0; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 1) { JobBuilder.printUsage(this, "<job ID>"); return -1; }//from w w w. j ava2 s . c o m String jobID = args[0]; // vv NewMissingTemperatureFields Cluster cluster = new Cluster(getConf()); Job job = cluster.getJob(JobID.forName(jobID)); // ^^ NewMissingTemperatureFields if (job == null) { System.err.printf("No job with ID %s found.\n", jobID); return -1; } if (!job.isComplete()) { System.err.printf("Job %s is not complete.\n", jobID); return -1; } // vv NewMissingTemperatureFields Counters counters = job.getCounters(); long missing = counters.findCounter(MaxTemperatureWithCounters.Temperature.MISSING).getValue(); long total = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue(); // ^^ NewMissingTemperatureFields System.out.printf("Records with missing temperature fields: %.2f%%\n", 100.0 * missing / total); return 0; }