List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:pl.edu.icm.coansys.heeut.TestHBaseMapReduce.java
License:Apache License
@Test(timeout = 1800000) public void testRowCounter() throws Exception { String tableInitName = getCurrentDateAppended("testRowCounter"); createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT); Job job = RowCounter.createSubmittableJob(UTIL.getConfiguration(), new String[] { tableInitName }); job.waitForCompletion(true);/* w w w . ja va 2 s .c o m*/ long count = job.getCounters() .findCounter("org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper$Counters", "ROWS") .getValue(); Assert.assertEquals(TEST_ROW_COUNT, count); dropTable(tableInitName); }
From source file:pl.edu.icm.coansys.richimporttsv.jobs.mapreduce.TestRichImportTsv.java
License:Apache License
public void testRowCounter() throws Exception { String tableInitName = getCurrentDateAppended("testRowCounter"); createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT); Job job = RowCounter.createSubmittableJob(UTIL.getConfiguration(), new String[] { tableInitName }); job.waitForCompletion(true);/*from w ww. ja v a2 s. com*/ long count = job.getCounters() .findCounter("org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper$Counters", "ROWS") .getValue(); Assert.assertEquals(TEST_ROW_COUNT, count); dropTable(tableInitName); }
From source file:sample.WordMedian.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }//from www.j ava 2 s .c om setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); System.out.println("TotalWords: " + totalWords); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:sculptor.framework.HClient.java
License:Apache License
/** * Row count using mapreduce./*from w w w .java2 s . c o m*/ * * @param kv entity * @param ops filter conditions * @return row count if job completed successfully, -1 if failed. * @throws Exception */ public long countMR(D kv, Map<String, HCompareOp> ops) throws Exception { String jobName = "Count " + tableName; Job job = new Job(this.getConfig(), jobName); job.setJarByClass(HClient.class); // scan setting Scan scan = getRawScan(kv, ops); scan.setCacheBlocks(false); // initialize the mapper TableMapReduceUtil.initTableMapperJob(getTableName(), scan, CountMapper.class, ImmutableBytesWritable.class, Result.class, job, false); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); boolean jobResult = job.waitForCompletion(true); if (!jobResult) { return -1; } Counters counters = job.getCounters(); Counter rowCounter = counters.findCounter(CountMapper.Counters.ROWS); return rowCounter.getValue(); }
From source file:SecondarySort.HashToAlternateWithSS.java
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub long startTime = System.nanoTime(); args[0] = "/home/ro0t/Desktop/BTP/graph/utube"; Path inputPath = new Path(args[0]); Path basePath = new Path(args[1]); Path outputPath = null;/* ww w . j a v a 2s . co m*/ FileSystem fs = FileSystem.get(getConf()); fs.delete(basePath, true); int iterationCount = 0; long terminationValue = 1; Job job; while (terminationValue > 0) { job = jobConfig(); if (iterationCount % 2 != 0) { job.setMapperClass(MapMSS.class); } else { job.setMapperClass(MapSS.class); } if (iterationCount != 0) {// for the first iteration the input will // be the first input argument if (iterationCount > 1) { fs.delete(inputPath, true); } inputPath = outputPath; } outputPath = new Path(basePath, iterationCount + ""); FileInputFormat.setInputPaths(job, inputPath); // setting the FileOutputFormat.setOutputPath(job, outputPath); // setting job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue(); iterationCount++; long comm = jobCntrs.findCounter(MRrounds.numberOfComunications).getValue(); long precom = jobCntrs.findCounter(MRrounds.precomm).getValue(); System.out.println("\n Round " + iterationCount + " => #Communications : " + (comm - precom)); jobCntrs.findCounter(MRrounds.precomm).setValue(comm); } if (iterationCount > 1) { fs.delete(inputPath, trace); } long estimatedTime = System.nanoTime() - startTime; System.out.println(" \nNumber of MR rounds: " + iterationCount + " Time of Completion: " + estimatedTime / 1000000000 + "\n"); return 0; }
From source file:SecondarySort.HashToMinWithSS.java
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub long startTime = System.nanoTime(); Path inputPath = new Path(args[0]); Path basePath = new Path(args[1]); Path outputPath = null;/*from ww w .j a v a 2s . co m*/ FileSystem fs = FileSystem.get(getConf()); fs.delete(basePath, true); int iterationCount = 0; long terminationValue = 1; Job job; while (terminationValue > 0) { job = jobConfig(); if (iterationCount % 2 != 0) { job.setMapperClass(MapMSS.class); } else { job.setMapperClass(MapMSS.class); } if (iterationCount != 0) {// for the first iteration the input will // be the first input argument if (iterationCount > 1) { fs.delete(inputPath, true); } inputPath = outputPath; } outputPath = new Path(basePath, iterationCount + ""); FileInputFormat.setInputPaths(job, inputPath); // setting the FileOutputFormat.setOutputPath(job, outputPath); // setting job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); terminationValue = jobCntrs.findCounter(MRrounds.rounds).getValue(); iterationCount++; long comm = jobCntrs.findCounter(MRrounds.numberOfComunications).getValue(); numberOfComunications = comm; long precom = jobCntrs.findCounter(MRrounds.precomm).getValue(); System.out.println("\n Round " + iterationCount + " => #Communications : " + (comm - precom)); jobCntrs.findCounter(MRrounds.precomm).setValue(comm); } if (iterationCount > 1) { fs.delete(inputPath, trace); } long estimatedTime = System.nanoTime() - startTime; System.out.println(" \nNumber of MR rounds: " + iterationCount + " Time of Completion: " + estimatedTime / 1000000000 + "\n"); return 0; }
From source file:SecondarySort.inputMaker.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub args[0] = "/home/ro0t/Desktop/BTP/graph/input1.txt"; Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); FileSystem fs = FileSystem.get(getConf()); fs.delete(outputPath, true);/*from ww w . j a v a 2 s . c om*/ Job job; job = jobConfig(); FileInputFormat.setInputPaths(job, inputPath); // setting the FileOutputFormat.setOutputPath(job, outputPath); // setting job.waitForCompletion(true); // wait for the job to complete Counters jobCntrs = job.getCounters(); long e = jobCntrs.findCounter(MRrounds.edges).getValue(); long n = jobCntrs.findCounter(MRrounds.nodes).getValue(); System.out.println("\n Nodes:" + n + "\tEdges " + e); fs.delete(new Path(args[1] + "-final"), true); job = jobConfig(); FileInputFormat.setInputPaths(job, outputPath); // setting the FileOutputFormat.setOutputPath(job, new Path(args[1] + "-final")); // setting job.waitForCompletion(true); // wait for the job to complete; fs.delete(outputPath, true); return 0; }
From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java
License:Apache License
protected void _testMultipleOutputs(boolean withCounters) throws Exception { String input = "a\nb\nc\nd\ne\nc\nd\ne"; //Configuration conf = createJobConf(); Configuration conf = new Configuration(); Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); job.waitForCompletion(true);/* w ww.java2s . c o m*/ // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OUT_DIR.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(OUT_DIR); for (FileStatus status : statuses) { String fileName = status.getPath().getName(); if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) { namedOutputCount++; } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) { valueBasedOutputCount++; } } //assertEquals(9, namedOutputCount); //assertEquals(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000")))); int count = 0; String line = reader.readLine(); while (line != null) { assertTrue(line.endsWith(TEXT)); line = reader.readLine(); count++; } reader.close(); assertFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf); assertEquals(IntWritable.class, seqReader.getKeyClass()); assertEquals(Text.class, seqReader.getValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.next(key, value)) { assertEquals(SEQUENCE, value.toString()); count++; } seqReader.close(); assertFalse(count == 0); if (withCounters) { CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName()); assertEquals(9, counters.size()); assertEquals(4, counters.findCounter(TEXT).getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue()); assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue()); assertEquals(2, counters.findCounter("a").getValue()); assertEquals(2, counters.findCounter("b").getValue()); assertEquals(4, counters.findCounter("c").getValue()); assertEquals(4, counters.findCounter("d").getValue()); assertEquals(4, counters.findCounter("e").getValue()); } }
From source file:simsql.runtime.MRLoader.java
License:Apache License
public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) { // make a directory for the relation Configuration conf = new Configuration(); FileSystem dfs = null;//from w ww . j a v a 2 s . c o m try { dfs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Cannot access HDFS!", e); } try { // if it exists, destroy it. Path path = new Path(outputPath); if (dfs.exists(path)) { dfs.delete(path, true); } } catch (Exception e) { throw new RuntimeException("Could not create the file to bulk load to!", e); } // find a file name String tempPath = null; if (inputPath.startsWith("hdfs:")) { tempPath = inputPath.replace("hdfs:", ""); } else { tempPath = "/tempDataFile_" + r.getName(); try { dfs.delete(new Path(tempPath), true); } catch (Exception e) { // ignore this. } // upload the text file try { dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath)); dfs.deleteOnExit(new Path(tempPath)); } catch (Exception e) { throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e); } } // set up the new job's parameters. conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); conf.set("io.serializations", "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt("simsql.loader.numAtts", r.getAttributes().size()); conf.setInt("simsql.loader.typeCode", (int) typeCode); conf.setInt("simsql.loader.sortAtt", sortAtt); String[] myStrings = new String[r.getAttributes().size()]; int j = 0; for (simsql.compiler.Attribute a : r.getAttributes()) { myStrings[j++] = a.getPhysicalRealization().getClass().getName(); } conf.setStrings("simsql.loader.types", myStrings); // create a job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create bulk loading job!", e); } // set the split size (number of mappers) long fSize = 0; if (inputPath.startsWith("hdfs")) { fSize = RelOp.getPathsTotalSize(new String[] { tempPath }); } else { fSize = new File(inputPath).length(); } FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks); FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks); // and the number of reducers job.setNumReduceTasks(numTasks); // the mapper/reducer/jar job.setMapperClass(MRLoaderMapper.class); job.setReducerClass(MRLoaderReducer.class); job.setJarByClass(MRLoader.class); // I/O settings. job.setOutputFormatClass(RecordOutputFormat.class); job.setMapOutputKeyClass(RecordKey.class); job.setMapOutputValueClass(RecordWrapper.class); job.setOutputKeyClass(Nothing.class); job.setOutputValueClass(Record.class); try { FileInputFormat.setInputPaths(job, new Path(tempPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (Exception e) { throw new RuntimeException("Could not set job inputs/outputs", e); } job.setGroupingComparatorClass(RecordKeyGroupingComparator.class); job.setPartitionerClass(RecordPartitioner.class); job.setSortComparatorClass(RecordKeySortComparator.class); job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath); // run it Counters counters; try { job.waitForCompletion(true); counters = job.getCounters(); } catch (Exception e) { throw new RuntimeException("Could not set up bulk loader job!", e); } // now, delete all the empty part files try { // get a filesystem FileSystem ddfs = FileSystem.get(conf); Path outPath = new Path(outputPath); if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) { FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... ddfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } // get the counter for the output of the mapper. Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); return bytesCounter.getValue(); }
From source file:simsql.runtime.RelOp.java
License:Apache License
public boolean run(RuntimeParameter params, boolean verbose) { ExampleRuntimeParameter pp = (ExampleRuntimeParameter) params; // build the jar. String jarFile = buildJarFile(params); // Get the default configuration object Configuration conf = new Configuration(); // set quite mode on/off conf.setQuietMode(!verbose);/*from w w w .j av a 2s . co m*/ /*** conf.setBoolean("mapred.task.profile", true); conf.set("mapred.task.profile.params", "-agentlib:hprof=cpu=samples," + "heap=sites,depth=8,force=n,thread=y,verbose=n,file=%s"); ***/ // tell it how to serialize and deserialize records and recordkeys conf.set("io.serializations", getSerializations()); conf.setBoolean("mapred.compress.map.output", true); int ioSortMB = conf.getInt("io.sort.mb", 256); conf.set("mapred.map.child.java.opts", "-Xmx" + (getMemPerMapper(params) + ioSortMB) + "m -Xms" + (getMemPerMapper(params)) + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log"); conf.set("mapred.reduce.child.java.opts", "-Xmx" + (getMemPerReducer(params) + ioSortMB) + "m -Xms" + (getMemPerMapper(params)) + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log"); conf.setInt("simsql.input.numSplits", pp.getNumCPUs()); conf.setInt("mapred.job.reuse.jvm.num.tasks", 1); // conf.setBoolean ("mapred.map.tasks.speculative.execution", false); // conf.setBoolean ("mapred.reduce.tasks.speculative.execution", false); // tell it to use the jar that we just created conf.set("mapred.jar", jarFile); // conf.set("tmpjars", "file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-core.jar"); conf.setBoolean("mapred.output.compress", true); conf.setStrings("mapred.output.compression.type", new String[] { "RECORD" }); // use snappy for the intermediate stuff conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); // do some additional operator-specific configurations setConfigurations(conf, params); // collect statistics for final relations always conf.setBoolean("simsql.collectStats", isFinal || collectStats); // figure out what file to map String[] inDirs = myInputNetwork.getPipelinedInputFiles(); inDirs = excludeAnyWhoWillNotBeMapped(inDirs); String inSingleString = inDirs[0]; conf.set("simsql.fileToMap", inSingleString); for (int i = 1; i < inDirs.length; i++) { inSingleString += "," + inDirs[i]; } // create and name the job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create a new job!", e); } job.setJobName(getJobName()); // set the map-reduce input and output types job.setMapOutputKeyClass(getMapOutputKeyClass()); job.setMapOutputValueClass(getMapOutputValueClass()); job.setOutputKeyClass(getOutputKeyClass()); job.setOutputValueClass(getOutputValueClass()); int numReducers = getNumReducers(params); job.setMapperClass(getMapperClass()); job.setReducerClass(getReducerClass()); // set the number of reducers job.setNumReduceTasks(numReducers); // set the input and the output formats... these extend FileInputFormat and FileOutputFormat job.setInputFormatClass(getInputFormatClass()); job.setOutputFormatClass(getOutputFormatClass()); // set the input and output paths try { System.out.println("input file: " + inSingleString); FileInputFormat.setInputPaths(job, inSingleString); FileInputFormat.setInputPathFilter(job, TableFileFilter.class); FileOutputFormat.setOutputPath(job, new Path(getOutput())); } catch (Exception e) { throw new RuntimeException("Unable to set up the input/output path for the job.", e); } // set the split size FileInputFormat.setMinInputSplitSize(job, getSplitSize(params)); FileInputFormat.setMaxInputSplitSize(job, getSplitSize(params)); // set the various sorting/grouping/mapping classes job.setGroupingComparatorClass(getGroupingComparatorClass()); job.setPartitionerClass(getPartitionerClass()); job.setSortComparatorClass(getSortComparatorClass()); // and now, submit the job and wait for things to finish int exitCode; try { exitCode = job.waitForCompletion(verbose) ? 0 : 1; // get the output bytes counter. Counters c = job.getCounters(); Counter mx = c.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); // and use them to set the size of the output relation. if (myDB != null) { myDB.setTableSize(myDB.getTableName(getOutput()), mx.getValue()); myDB.setNumAtts(myDB.getTableName(getOutput()), getOutputAttNames().length); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Unable to run the job", e); } // now, delete all the empty part files try { // get a filesystem FileSystem dfs = FileSystem.get(conf); Path outPath = new Path(getOutput()); if (dfs.exists(outPath) && dfs.isDirectory(outPath)) { FileStatus fstatus[] = dfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (dfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... dfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } return (exitCode == 0); }