Example usage for org.apache.hadoop.mapred Reporter incrCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter incrCounter.

Prototype

public abstract void incrCounter(Enum<?> key, long amount);

Source Link

Document

Increments the counter identified by the key, which can be of any Enum type, by the specified amount.

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

private static List<Long> getPageBytes(FileSplit split, FileSystem fs,
        CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException {
    SeekableInputStream in = null;/*from w  w  w. j a  va  2  s  .  c o  m*/
    try {
        in = SeekableInputStream.getInstance(split, fs, compressionCodecs);
        long start = split.getStart();
        long end = start + split.getLength();
        InputStream cin = null;
        if (cin != null) {
            // start = cin.getAdjustedStart();
            // end = cin.getAdjustedEnd() + 1;
        }
        ByteMatcher matcher = new ByteMatcher(in, in);
        List<Long> ret = new ArrayList<Long>();
        while (true) {
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) {
                break;
            }
            ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length);
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) {
                System.err.println("could not find " + pageEndPattern + ", page over a split?  pos="
                        + matcher.getPos() + " bytes=" + matcher.getReadBytes());
                // ret.add(end);
                break;
            }
            ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length);
            String report = String.format(
                    "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(),
                    start, matcher.getPos(), end, matcher.getReadBytes());
            reporter.setStatus(report);
            reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1);
            LOG.info(report);
        }
        if (ret.size() % 2 == 0) {
            ret.add(matcher.getReadBytes());
        }
        // System.err.println("getPageBytes " + ret);//!
        return ret;
    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;//  ww w  . java  2  s.  c  om
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:ca.etsmtl.logti.log792.mti830.RowCounter.java

License:Apache License

public void map(ImmutableBytesWritable row, RowResult value,
        OutputCollector<ImmutableBytesWritable, RowResult> output,
        @SuppressWarnings("unused") Reporter reporter) throws IOException {
    boolean content = false;
    for (Map.Entry<byte[], Cell> e : value.entrySet()) {
        Cell cell = e.getValue();/*from  ww  w.  j a v a 2s  .  co  m*/
        if (cell != null && cell.getValue().length > 0) {
            content = true;
            break;
        }
    }
    if (!content) {
        return;
    }
    // Give out same value every time.  We're only interested in the row/key
    reporter.incrCounter(Counters.ROWS, 1);
    output.collect(row, EMPTY_RESULT_VALUE);
}

From source file:com.chriscx.mapred.Map.java

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase();

    for (String pattern : patternsToSkip) {
        line = line.replaceAll(pattern, "");
    }/* w  w w .  j a  va  2s  .c om*/

    StringTokenizer tokenizer = new StringTokenizer(line);
    while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        output.collect(word, one);
        reporter.incrCounter(Counters.INPUT_WORDS, 1);
    }

    if ((++numRecords % 100) == 0) {
        reporter.setStatus(
                "Finished processing " + numRecords + " records " + "from the input file: " + inputFile);
    }
}

From source file:com.hdfs.concat.crush.CrushReducer.java

License:Apache License

@Override
public void reduce(Text bucketId, Iterator<Text> values, OutputCollector<Text, Text> collector,
        Reporter reporter) throws IOException {
    String bucket = bucketId.toString();

    String dirName = bucket.substring(0, bucket.lastIndexOf('-'));

    int idx = findMatcher(dirName);

    String outputFileName = calculateOutputFile(idx, dirName);

    /*/*w ww  . j a va 2  s  .  c om*/
     * Don't need to separate the paths because the output file name is already absolute.
     */
    valueOut.set(outDirPath + outputFileName);

    LOG.info(format("Crushing bucket '%s' to file '%s'", bucket, outputFileName));

    /*
     * Strip the leading slash to make the path relative. the output format will relativize it to the task attempt work dir.
     */
    RecordWriter<Object, Object> sink = null;
    Exception rootCause = null;

    Object key = null;
    Object value = null;

    try {
        while (null == rootCause && values.hasNext()) {
            Text srcFile = values.next();
            Path inputPath = new Path(srcFile.toString());

            RecordReader<Object, Object> reader = createRecordReader(idx, inputPath, reporter);

            try {
                if (null == key) {
                    key = reader.createKey();
                    value = reader.createValue();

                    /*
                     * Set the key and value class in the conf, which the output format uses to get type information.
                     */
                    job.setOutputKeyClass(key.getClass());
                    job.setOutputValueClass(value.getClass());

                    /*
                     * Output file name is absolute so we can just add it to the crush prefix.
                     */
                    sink = createRecordWriter(idx, "crush" + outputFileName);
                } else {

                    Class<?> other = reader.createKey().getClass();

                    if (!(key.getClass().equals(other))) {
                        throw new IllegalArgumentException(format("Heterogeneous keys detected in %s: %s !- %s",
                                inputPath, key.getClass(), other));
                    }

                    other = reader.createValue().getClass();

                    if (!value.getClass().equals(other)) {
                        throw new IllegalArgumentException(
                                format("Heterogeneous values detected in %s: %s !- %s", inputPath,
                                        value.getClass(), other));
                    }
                }

                while (reader.next(key, value)) {
                    sink.write(key, value);
                    reporter.incrCounter(ReducerCounter.RECORDS_CRUSHED, 1);
                }
            } catch (Exception e) {
                rootCause = e;
            } finally {
                try {
                    reader.close();
                } catch (Exception e) {
                    if (null == rootCause) {
                        rootCause = e;
                    } else {
                        LOG.debug("Swallowing exception on close of " + inputPath, e);
                    }
                }
            }

            /*
             * Output of the reducer is the source file => crushed file (in the final output dir, no the task attempt work dir.
             */
            collector.collect(srcFile, valueOut);
            reporter.incrCounter(ReducerCounter.FILES_CRUSHED, 1);

            recordNumber++;

            if (reportRecordNumber == recordNumber) {
                reportRecordNumber += reportRecordNumber;

                reporter.setStatus(format("Processed %,d files %s : %s", recordNumber, bucket, inputPath));
            }
        }
    } catch (Exception e) {
        rootCause = e;
    } finally {
        if (null != sink) {
            try {
                sink.close(reporter);
            } catch (Exception e) {
                if (null == rootCause) {
                    rootCause = e;
                } else {
                    LOG.error("Swallowing exception on close of " + outputFileName, e);
                }
            }
        }

        /*
         * Let the exception bubble up with a minimum of wrapping.
         */
        if (null != rootCause) {
            if (rootCause instanceof RuntimeException) {
                throw (RuntimeException) rootCause;
            }

            if (rootCause instanceof IOException) {
                throw (IOException) rootCause;
            }

            throw new RuntimeException(rootCause);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockReducer.java

License:Open Source License

@Override
public void reduce(TaggedFirstSecondIndexes key, Iterator<BlockRow> values,
        OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    commonSetup(reporter);/*from  w ww  .j  a v  a  2 s  .c  o  m*/

    cachedValues.reset();

    //process the reducer part of the reblock operation
    processCSVReblock(key, values, dimensions);

    //perform mixed operations
    processReducerInstructions();

    //output results
    outputResultsFromCachedValues(reporter);

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteMapper.java

License:Open Source License

@Override
@SuppressWarnings("unchecked")
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out,
        Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //for each represenattive matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        //convert the record into the right format for the representatice matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);

        byte thisMatrix = representativeMatrixes.get(i);

        //apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();

            MatrixBlock value = pair.getValue();

            outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
            ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
            for (byte output : outputs) {
                outIndexes.setTag(output);
                out.collect(outIndexes, value);
                //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
            }/*w w w.j a va 2 s  .c o m*/
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteReducer.java

License:Open Source License

@Override
public void reduce(TaggedFirstSecondIndexes inkey, Iterator<MatrixBlock> inValue,
        OutputCollector<NullWritable, RowBlockForTextOutput> out, Reporter reporter) throws IOException {
    long begin = System.currentTimeMillis();

    cachedReporter = reporter;/*from   www .j av  a2s.  c  om*/

    byte tag = inkey.getTag();
    zeroBlock.setFormatParameters(delims[tag], sparses[tag]);
    outValue.setFormatParameters(delims[tag], sparses[tag]);

    Situation sit = Situation.MIDDLE;
    if (rowIndexes[tag] == minRowIndexes[tag])
        sit = Situation.START;
    else if (rowIndexes[tag] != inkey.getFirstIndex())
        sit = Situation.NEWLINE;

    //check whether need to fill in missing values in previous rows
    if (sit == Situation.NEWLINE) {
        //if the previous row has not finished
        addEndingMissingValues(tag, reporter);
    }

    if (sit == Situation.NEWLINE || sit == Situation.START) {
        //if a row is completely missing
        sit = addMissingRows(tag, inkey.getFirstIndex(), sit, reporter);
    }

    //add missing value at the beginning of this row
    for (long col = colIndexes[tag] + 1; col < inkey.getSecondIndex(); col++) {
        zeroBlock.setNumColumns(colsPerBlock[tag]);
        zeroBlock.setSituation(sit);
        collectFinalMultipleOutputs.directOutput(nullKey, zeroBlock, tagToResultIndex[tag], reporter);
        sit = Situation.MIDDLE;
    }

    colIndexes[tag] = inkey.getSecondIndex();

    while (inValue.hasNext()) {
        MatrixBlock block = inValue.next();
        outValue.setData(block);
        outValue.setNumColumns(block.getNumColumns());
        outValue.setSituation(sit);

        collectFinalMultipleOutputs.directOutput(nullKey, outValue, tagToResultIndex[tag], reporter);
        resultsNonZeros[tagToResultIndex[tag]] += block.getNonZeros();
        sit = Situation.MIDDLE;
    }
    rowIndexes[tag] = inkey.getFirstIndex();

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, (System.currentTimeMillis() - begin));
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.DataGenMapper.java

License:Open Source License

@Override
//valueString has to be Text type
public void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter)
        throws IOException {
    cachedReporter = reporter;/*from www.j a va  2s .  c o  m*/

    long start = System.currentTimeMillis();

    //for each representative matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        DataGenMRInstruction genInst = dataGen_instructions.get(i);

        if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
            RandInstruction randInst = (RandInstruction) genInst;
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            int blockRowSize = Integer.parseInt(params[2]);
            int blockColSize = Integer.parseInt(params[3]);
            long blockNNZ = Integer.parseInt(params[4]);
            long seed = Long.parseLong(params[5]);
            double minValue = randInst.getMinValue();
            double maxValue = randInst.getMaxValue();
            double sparsity = randInst.getSparsity();
            String pdf = randInst.getProbabilityDensityFunction().toLowerCase();

            //rand data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);

                RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize,
                        blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue,
                        randInst.getPdfParams());

                block[i].randOperationsInPlace(rgen, new long[] { blockNNZ }, null, seed);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            double from = Double.parseDouble(params[2]);
            double to = Double.parseDouble(params[3]);
            double incr = Double.parseDouble(params[4]);

            //sequence data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                block[i].seqOperationsInPlace(from, to, incr);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else {
            throw new IOException("Unknown data generation instruction: " + genInst.toString());
        }

        //put the input in the cache
        cachedValues.reset();
        cachedValues.set(genInst.output, indexes[i], block[i]);

        //special operations for individual mapp type
        specialOperationsForActualMap(i, out, reporter);
    }

    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.GMRCombiner.java

License:Open Source License

public void reduce(MatrixIndexes indexes, Iterator<TaggedMatrixValue> values,
        OutputCollector<MatrixIndexes, TaggedMatrixValue> out, Reporter report) throws IOException {
    long start = System.currentTimeMillis();

    cachedValues.reset();//from w ww.j  a va2s  .  c  om

    processAggregateInstructions(indexes, values, true);

    //output the matrices needed by the reducer
    outputInCombinerFromCachedValues(indexes, taggedbuffer, out);

    report.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}