List of usage examples for org.apache.hadoop.mapred Reporter incrCounter
public abstract void incrCounter(Enum<?> key, long amount);
From source file:StreamWikiDumpInputFormat.java
License:Apache License
private static List<Long> getPageBytes(FileSplit split, FileSystem fs, CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException { SeekableInputStream in = null;/*from w w w. j a va 2 s . c o m*/ try { in = SeekableInputStream.getInstance(split, fs, compressionCodecs); long start = split.getStart(); long end = start + split.getLength(); InputStream cin = null; if (cin != null) { // start = cin.getAdjustedStart(); // end = cin.getAdjustedEnd() + 1; } ByteMatcher matcher = new ByteMatcher(in, in); List<Long> ret = new ArrayList<Long>(); while (true) { if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) { break; } ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length); if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) { System.err.println("could not find " + pageEndPattern + ", page over a split? pos=" + matcher.getPos() + " bytes=" + matcher.getReadBytes()); // ret.add(end); break; } ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length); String report = String.format( "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(), start, matcher.getPos(), end, matcher.getReadBytes()); reporter.setStatus(report); reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1); LOG.info(report); } if (ret.size() % 2 == 0) { ret.add(matcher.getReadBytes()); } // System.err.println("getPageBytes " + ret);//! return ret; } finally { if (in != null) { in.close(); } } }
From source file:BU.MET.CS755.SpeciesIterReducer2.java
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { double score = 0; String outLinks = ""; double oldScore = 0; // Counting links reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L); if (iterationNumber == 1) { return;// ww w . java 2 s. c om } while (values.hasNext()) { String curr = ((Text) values.next()).toString(); int colon = curr.indexOf(":"); int space = curr.indexOf(" "); int oldrank = curr.indexOf("oldrank"); if ((colon > -1)) { String presScore = curr.substring(0, colon); try { score += Double.parseDouble(presScore); oldScore = score; outLinks = curr.substring(colon + 1); continue; } catch (Exception e) { } } if (space > -1) { outLinks = curr; } else if (oldrank > -1) { oldScore = new Double(curr.substring(oldrank + 8)); } else { score += Double.parseDouble(curr); } } String toEmit; if (outLinks.length() > 0) { toEmit = (new Double(score)).toString() + ":" + outLinks; } else { toEmit = (new Double(score)).toString(); } // Output the new page rank output.collect(key, new Text(toEmit)); double delta = oldScore - score; // Check how much the new page rank has changed. If the change is less // than two decimal places, treat it as a converged value. If not, // we need to re-calculate the rank with one more iteration; inform the // driver about that by incrementing the iterations needed counter. if ((delta > 0.009) || (delta < -0.009)) { Counter myCounter2 = reporter .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED); if (myCounter2 != null) { reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L); } } }
From source file:ca.etsmtl.logti.log792.mti830.RowCounter.java
License:Apache License
public void map(ImmutableBytesWritable row, RowResult value, OutputCollector<ImmutableBytesWritable, RowResult> output, @SuppressWarnings("unused") Reporter reporter) throws IOException { boolean content = false; for (Map.Entry<byte[], Cell> e : value.entrySet()) { Cell cell = e.getValue();/*from ww w. j a v a 2s . co m*/ if (cell != null && cell.getValue().length > 0) { content = true; break; } } if (!content) { return; } // Give out same value every time. We're only interested in the row/key reporter.incrCounter(Counters.ROWS, 1); output.collect(row, EMPTY_RESULT_VALUE); }
From source file:com.chriscx.mapred.Map.java
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase(); for (String pattern : patternsToSkip) { line = line.replaceAll(pattern, ""); }/* w w w . j a va 2s .c om*/ StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); reporter.incrCounter(Counters.INPUT_WORDS, 1); } if ((++numRecords % 100) == 0) { reporter.setStatus( "Finished processing " + numRecords + " records " + "from the input file: " + inputFile); } }
From source file:com.hdfs.concat.crush.CrushReducer.java
License:Apache License
@Override public void reduce(Text bucketId, Iterator<Text> values, OutputCollector<Text, Text> collector, Reporter reporter) throws IOException { String bucket = bucketId.toString(); String dirName = bucket.substring(0, bucket.lastIndexOf('-')); int idx = findMatcher(dirName); String outputFileName = calculateOutputFile(idx, dirName); /*/*w ww . j a va 2 s . c om*/ * Don't need to separate the paths because the output file name is already absolute. */ valueOut.set(outDirPath + outputFileName); LOG.info(format("Crushing bucket '%s' to file '%s'", bucket, outputFileName)); /* * Strip the leading slash to make the path relative. the output format will relativize it to the task attempt work dir. */ RecordWriter<Object, Object> sink = null; Exception rootCause = null; Object key = null; Object value = null; try { while (null == rootCause && values.hasNext()) { Text srcFile = values.next(); Path inputPath = new Path(srcFile.toString()); RecordReader<Object, Object> reader = createRecordReader(idx, inputPath, reporter); try { if (null == key) { key = reader.createKey(); value = reader.createValue(); /* * Set the key and value class in the conf, which the output format uses to get type information. */ job.setOutputKeyClass(key.getClass()); job.setOutputValueClass(value.getClass()); /* * Output file name is absolute so we can just add it to the crush prefix. */ sink = createRecordWriter(idx, "crush" + outputFileName); } else { Class<?> other = reader.createKey().getClass(); if (!(key.getClass().equals(other))) { throw new IllegalArgumentException(format("Heterogeneous keys detected in %s: %s !- %s", inputPath, key.getClass(), other)); } other = reader.createValue().getClass(); if (!value.getClass().equals(other)) { throw new IllegalArgumentException( format("Heterogeneous values detected in %s: %s !- %s", inputPath, value.getClass(), other)); } } while (reader.next(key, value)) { sink.write(key, value); reporter.incrCounter(ReducerCounter.RECORDS_CRUSHED, 1); } } catch (Exception e) { rootCause = e; } finally { try { reader.close(); } catch (Exception e) { if (null == rootCause) { rootCause = e; } else { LOG.debug("Swallowing exception on close of " + inputPath, e); } } } /* * Output of the reducer is the source file => crushed file (in the final output dir, no the task attempt work dir. */ collector.collect(srcFile, valueOut); reporter.incrCounter(ReducerCounter.FILES_CRUSHED, 1); recordNumber++; if (reportRecordNumber == recordNumber) { reportRecordNumber += reportRecordNumber; reporter.setStatus(format("Processed %,d files %s : %s", recordNumber, bucket, inputPath)); } } } catch (Exception e) { rootCause = e; } finally { if (null != sink) { try { sink.close(reporter); } catch (Exception e) { if (null == rootCause) { rootCause = e; } else { LOG.error("Swallowing exception on close of " + outputFileName, e); } } } /* * Let the exception bubble up with a minimum of wrapping. */ if (null != rootCause) { if (rootCause instanceof RuntimeException) { throw (RuntimeException) rootCause; } if (rootCause instanceof IOException) { throw (IOException) rootCause; } throw new RuntimeException(rootCause); } } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockReducer.java
License:Open Source License
@Override public void reduce(TaggedFirstSecondIndexes key, Iterator<BlockRow> values, OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter) throws IOException { long start = System.currentTimeMillis(); commonSetup(reporter);/*from w ww .j a v a 2 s .c o m*/ cachedValues.reset(); //process the reducer part of the reblock operation processCSVReblock(key, values, dimensions); //perform mixed operations processReducerInstructions(); //output results outputResultsFromCachedValues(reporter); reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteMapper.java
License:Open Source License
@Override @SuppressWarnings("unchecked") public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out, Reporter reporter) throws IOException { long start = System.currentTimeMillis(); //for each represenattive matrix, read the record and apply instructions for (int i = 0; i < representativeMatrixes.size(); i++) { //convert the record into the right format for the representatice matrix inputConverter.setBlockSize(brlens[i], bclens[i]); inputConverter.convert(rawKey, rawValue); byte thisMatrix = representativeMatrixes.get(i); //apply unary instructions on the converted indexes and values while (inputConverter.hasNext()) { Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next(); MatrixIndexes indexes = pair.getKey(); MatrixBlock value = pair.getValue(); outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex()); ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix); for (byte output : outputs) { outIndexes.setTag(output); out.collect(outIndexes, value); //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output); }/*w w w.j a va 2 s .c o m*/ } } reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteReducer.java
License:Open Source License
@Override public void reduce(TaggedFirstSecondIndexes inkey, Iterator<MatrixBlock> inValue, OutputCollector<NullWritable, RowBlockForTextOutput> out, Reporter reporter) throws IOException { long begin = System.currentTimeMillis(); cachedReporter = reporter;/*from www .j av a2s. c om*/ byte tag = inkey.getTag(); zeroBlock.setFormatParameters(delims[tag], sparses[tag]); outValue.setFormatParameters(delims[tag], sparses[tag]); Situation sit = Situation.MIDDLE; if (rowIndexes[tag] == minRowIndexes[tag]) sit = Situation.START; else if (rowIndexes[tag] != inkey.getFirstIndex()) sit = Situation.NEWLINE; //check whether need to fill in missing values in previous rows if (sit == Situation.NEWLINE) { //if the previous row has not finished addEndingMissingValues(tag, reporter); } if (sit == Situation.NEWLINE || sit == Situation.START) { //if a row is completely missing sit = addMissingRows(tag, inkey.getFirstIndex(), sit, reporter); } //add missing value at the beginning of this row for (long col = colIndexes[tag] + 1; col < inkey.getSecondIndex(); col++) { zeroBlock.setNumColumns(colsPerBlock[tag]); zeroBlock.setSituation(sit); collectFinalMultipleOutputs.directOutput(nullKey, zeroBlock, tagToResultIndex[tag], reporter); sit = Situation.MIDDLE; } colIndexes[tag] = inkey.getSecondIndex(); while (inValue.hasNext()) { MatrixBlock block = inValue.next(); outValue.setData(block); outValue.setNumColumns(block.getNumColumns()); outValue.setSituation(sit); collectFinalMultipleOutputs.directOutput(nullKey, outValue, tagToResultIndex[tag], reporter); resultsNonZeros[tagToResultIndex[tag]] += block.getNonZeros(); sit = Situation.MIDDLE; } rowIndexes[tag] = inkey.getFirstIndex(); reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, (System.currentTimeMillis() - begin)); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.DataGenMapper.java
License:Open Source License
@Override //valueString has to be Text type public void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { cachedReporter = reporter;/*from www.j a va 2s . c o m*/ long start = System.currentTimeMillis(); //for each representative matrix, read the record and apply instructions for (int i = 0; i < representativeMatrixes.size(); i++) { DataGenMRInstruction genInst = dataGen_instructions.get(i); if (genInst.getDataGenMethod() == DataGenMethod.RAND) { RandInstruction randInst = (RandInstruction) genInst; String[] params = valueString.toString().split(","); long blockRowNumber = Long.parseLong(params[0]); long blockColNumber = Long.parseLong(params[1]); int blockRowSize = Integer.parseInt(params[2]); int blockColSize = Integer.parseInt(params[3]); long blockNNZ = Integer.parseInt(params[4]); long seed = Long.parseLong(params[5]); double minValue = randInst.getMinValue(); double maxValue = randInst.getMaxValue(); double sparsity = randInst.getSparsity(); String pdf = randInst.getProbabilityDensityFunction().toLowerCase(); //rand data generation try { indexes[i].setIndexes(blockRowNumber, blockColNumber); RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize, blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue, randInst.getPdfParams()); block[i].randOperationsInPlace(rgen, new long[] { blockNNZ }, null, seed); } catch (DMLRuntimeException e) { throw new IOException(e); } } else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) { String[] params = valueString.toString().split(","); long blockRowNumber = Long.parseLong(params[0]); long blockColNumber = Long.parseLong(params[1]); double from = Double.parseDouble(params[2]); double to = Double.parseDouble(params[3]); double incr = Double.parseDouble(params[4]); //sequence data generation try { indexes[i].setIndexes(blockRowNumber, blockColNumber); block[i].seqOperationsInPlace(from, to, incr); } catch (DMLRuntimeException e) { throw new IOException(e); } } else { throw new IOException("Unknown data generation instruction: " + genInst.toString()); } //put the input in the cache cachedValues.reset(); cachedValues.set(genInst.output, indexes[i], block[i]); //special operations for individual mapp type specialOperationsForActualMap(i, out, reporter); } reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.GMRCombiner.java
License:Open Source License
public void reduce(MatrixIndexes indexes, Iterator<TaggedMatrixValue> values, OutputCollector<MatrixIndexes, TaggedMatrixValue> out, Reporter report) throws IOException { long start = System.currentTimeMillis(); cachedValues.reset();//from w ww.j a va2s . c om processAggregateInstructions(indexes, values, true); //output the matrices needed by the reducer outputInCombinerFromCachedValues(indexes, taggedbuffer, out); report.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start); }