Example usage for org.apache.hadoop.mapred Reporter incrCounter

List of usage examples for org.apache.hadoop.mapred Reporter incrCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter incrCounter.

Prototype

public abstract void incrCounter(Enum<?> key, long amount);

Source Link

Document

Increments the counter identified by the key, which can be of any Enum type, by the specified amount.

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

private static List<Long> getPageBytes(FileSplit split, FileSystem fs,
        CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException {
    SeekableInputStream in = null;/*from w  w  w. j a  va  2  s  .  c o  m*/
    try {
        in = SeekableInputStream.getInstance(split, fs, compressionCodecs);
        long start = split.getStart();
        long end = start + split.getLength();
        InputStream cin = null;
        if (cin != null) {
            // start = cin.getAdjustedStart();
            // end = cin.getAdjustedEnd() + 1;
        }
        ByteMatcher matcher = new ByteMatcher(in, in);
        List<Long> ret = new ArrayList<Long>();
        while (true) {
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) {
                break;
            }
            ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length);
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) {
                System.err.println("could not find " + pageEndPattern + ", page over a split?  pos="
                        + matcher.getPos() + " bytes=" + matcher.getReadBytes());
                // ret.add(end);
                break;
            }
            ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length);
            String report = String.format(
                    "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(),
                    start, matcher.getPos(), end, matcher.getReadBytes());
            reporter.setStatus(report);
            reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1);
            LOG.info(report);
        }
        if (ret.size() % 2 == 0) {
            ret.add(matcher.getReadBytes());
        }
        // System.err.println("getPageBytes " + ret);//!
        return ret;
    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;//  ww w  . java  2  s.  c  om
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:ca.etsmtl.logti.log792.mti830.RowCounter.java

License:Apache License

public void map(ImmutableBytesWritable row, RowResult value,
        OutputCollector<ImmutableBytesWritable, RowResult> output,
        @SuppressWarnings("unused") Reporter reporter) throws IOException {
    boolean content = false;
    for (Map.Entry<byte[], Cell> e : value.entrySet()) {
        Cell cell = e.getValue();/*from  ww  w.  j a v a 2s  .  co  m*/
        if (cell != null && cell.getValue().length > 0) {
            content = true;
            break;
        }
    }
    if (!content) {
        return;
    }
    // Give out same value every time.  We're only interested in the row/key
    reporter.incrCounter(Counters.ROWS, 1);
    output.collect(row, EMPTY_RESULT_VALUE);
}

From source file:com.chriscx.mapred.Map.java

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase();

    for (String pattern : patternsToSkip) {
        line = line.replaceAll(pattern, "");
    }/* w  w w .  j a  va  2s  .c om*/

    StringTokenizer tokenizer = new StringTokenizer(line);
    while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        output.collect(word, one);
        reporter.incrCounter(Counters.INPUT_WORDS, 1);
    }

    if ((++numRecords % 100) == 0) {
        reporter.setStatus(
                "Finished processing " + numRecords + " records " + "from the input file: " + inputFile);
    }
}

From source file:com.hdfs.concat.crush.CrushReducer.java

License:Apache License

@Override
public void reduce(Text bucketId, Iterator<Text> values, OutputCollector<Text, Text> collector,
        Reporter reporter) throws IOException {
    String bucket = bucketId.toString();

    String dirName = bucket.substring(0, bucket.lastIndexOf('-'));

    int idx = findMatcher(dirName);

    String outputFileName = calculateOutputFile(idx, dirName);

    /*/*w ww  . j a va 2  s  .  c om*/
     * Don't need to separate the paths because the output file name is already absolute.
     */
    valueOut.set(outDirPath + outputFileName);

    LOG.info(format("Crushing bucket '%s' to file '%s'", bucket, outputFileName));

    /*
     * Strip the leading slash to make the path relative. the output format will relativize it to the task attempt work dir.
     */
    RecordWriter<Object, Object> sink = null;
    Exception rootCause = null;

    Object key = null;
    Object value = null;

    try {
        while (null == rootCause && values.hasNext()) {
            Text srcFile = values.next();
            Path inputPath = new Path(srcFile.toString());

            RecordReader<Object, Object> reader = createRecordReader(idx, inputPath, reporter);

            try {
                if (null == key) {
                    key = reader.createKey();
                    value = reader.createValue();

                    /*
                     * Set the key and value class in the conf, which the output format uses to get type information.
                     */
                    job.setOutputKeyClass(key.getClass());
                    job.setOutputValueClass(value.getClass());

                    /*
                     * Output file name is absolute so we can just add it to the crush prefix.
                     */
                    sink = createRecordWriter(idx, "crush" + outputFileName);
                } else {

                    Class<?> other = reader.createKey().getClass();

                    if (!(key.getClass().equals(other))) {
                        throw new IllegalArgumentException(format("Heterogeneous keys detected in %s: %s !- %s",
                                inputPath, key.getClass(), other));
                    }

                    other = reader.createValue().getClass();

                    if (!value.getClass().equals(other)) {
                        throw new IllegalArgumentException(
                                format("Heterogeneous values detected in %s: %s !- %s", inputPath,
                                        value.getClass(), other));
                    }
                }

                while (reader.next(key, value)) {
                    sink.write(key, value);
                    reporter.incrCounter(ReducerCounter.RECORDS_CRUSHED, 1);
                }
            } catch (Exception e) {
                rootCause = e;
            } finally {
                try {
                    reader.close();
                } catch (Exception e) {
                    if (null == rootCause) {
                        rootCause = e;
                    } else {
                        LOG.debug("Swallowing exception on close of " + inputPath, e);
                    }
                }
            }

            /*
             * Output of the reducer is the source file => crushed file (in the final output dir, no the task attempt work dir.
             */
            collector.collect(srcFile, valueOut);
            reporter.incrCounter(ReducerCounter.FILES_CRUSHED, 1);

            recordNumber++;

            if (reportRecordNumber == recordNumber) {
                reportRecordNumber += reportRecordNumber;

                reporter.setStatus(format("Processed %,d files %s : %s", recordNumber, bucket, inputPath));
            }
        }
    } catch (Exception e) {
        rootCause = e;
    } finally {
        if (null != sink) {
            try {
                sink.close(reporter);
            } catch (Exception e) {
                if (null == rootCause) {
                    rootCause = e;
                } else {
                    LOG.error("Swallowing exception on close of " + outputFileName, e);
                }
            }
        }

        /*
         * Let the exception bubble up with a minimum of wrapping.
         */
        if (null != rootCause) {
            if (rootCause instanceof RuntimeException) {
                throw (RuntimeException) rootCause;
            }

            if (rootCause instanceof IOException) {
                throw (IOException) rootCause;
            }

            throw new RuntimeException(rootCause);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockReducer.java

License:Open Source License

@Override
public void reduce(TaggedFirstSecondIndexes key, Iterator<BlockRow> values,
        OutputCollector<MatrixIndexes, MatrixBlock> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    commonSetup(reporter);/*from  w ww  .j  a v  a  2 s  .c  o  m*/

    cachedValues.reset();

    //process the reducer part of the reblock operation
    processCSVReblock(key, values, dimensions);

    //perform mixed operations
    processReducerInstructions();

    //output results
    outputResultsFromCachedValues(reporter);

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteMapper.java

License:Open Source License

@Override
@SuppressWarnings("unchecked")
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out,
        Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //for each represenattive matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        //convert the record into the right format for the representatice matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);

        byte thisMatrix = representativeMatrixes.get(i);

        //apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();

            MatrixBlock value = pair.getValue();

            outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
            ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
            for (byte output : outputs) {
                outIndexes.setTag(output);
                out.collect(outIndexes, value);
                //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
            }/*w w w.j a va 2 s  .c o m*/
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVWriteReducer.java

License:Open Source License

@Override
public void reduce(TaggedFirstSecondIndexes inkey, Iterator<MatrixBlock> inValue,
        OutputCollector<NullWritable, RowBlockForTextOutput> out, Reporter reporter) throws IOException {
    long begin = System.currentTimeMillis();

    cachedReporter = reporter;/*from   www .j av  a2s.  c  om*/

    byte tag = inkey.getTag();
    zeroBlock.setFormatParameters(delims[tag], sparses[tag]);
    outValue.setFormatParameters(delims[tag], sparses[tag]);

    Situation sit = Situation.MIDDLE;
    if (rowIndexes[tag] == minRowIndexes[tag])
        sit = Situation.START;
    else if (rowIndexes[tag] != inkey.getFirstIndex())
        sit = Situation.NEWLINE;

    //check whether need to fill in missing values in previous rows
    if (sit == Situation.NEWLINE) {
        //if the previous row has not finished
        addEndingMissingValues(tag, reporter);
    }

    if (sit == Situation.NEWLINE || sit == Situation.START) {
        //if a row is completely missing
        sit = addMissingRows(tag, inkey.getFirstIndex(), sit, reporter);
    }

    //add missing value at the beginning of this row
    for (long col = colIndexes[tag] + 1; col < inkey.getSecondIndex(); col++) {
        zeroBlock.setNumColumns(colsPerBlock[tag]);
        zeroBlock.setSituation(sit);
        collectFinalMultipleOutputs.directOutput(nullKey, zeroBlock, tagToResultIndex[tag], reporter);
        sit = Situation.MIDDLE;
    }

    colIndexes[tag] = inkey.getSecondIndex();

    while (inValue.hasNext()) {
        MatrixBlock block = inValue.next();
        outValue.setData(block);
        outValue.setNumColumns(block.getNumColumns());
        outValue.setSituation(sit);

        collectFinalMultipleOutputs.directOutput(nullKey, outValue, tagToResultIndex[tag], reporter);
        resultsNonZeros[tagToResultIndex[tag]] += block.getNonZeros();
        sit = Situation.MIDDLE;
    }
    rowIndexes[tag] = inkey.getFirstIndex();

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, (System.currentTimeMillis() - begin));
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.DataGenMapper.java

License:Open Source License

@Override
//valueString has to be Text type
public void map(Writable key, Writable valueString, OutputCollector<Writable, Writable> out, Reporter reporter)
        throws IOException {
    cachedReporter = reporter;/*from www.j a va  2s .  c o  m*/

    long start = System.currentTimeMillis();

    //for each representative matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        DataGenMRInstruction genInst = dataGen_instructions.get(i);

        if (genInst.getDataGenMethod() == DataGenMethod.RAND) {
            RandInstruction randInst = (RandInstruction) genInst;
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            int blockRowSize = Integer.parseInt(params[2]);
            int blockColSize = Integer.parseInt(params[3]);
            long blockNNZ = Integer.parseInt(params[4]);
            long seed = Long.parseLong(params[5]);
            double minValue = randInst.getMinValue();
            double maxValue = randInst.getMaxValue();
            double sparsity = randInst.getSparsity();
            String pdf = randInst.getProbabilityDensityFunction().toLowerCase();

            //rand data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);

                RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, blockRowSize,
                        blockColSize, blockRowSize, blockColSize, sparsity, minValue, maxValue,
                        randInst.getPdfParams());

                block[i].randOperationsInPlace(rgen, new long[] { blockNNZ }, null, seed);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else if (genInst.getDataGenMethod() == DataGenMethod.SEQ) {
            String[] params = valueString.toString().split(",");
            long blockRowNumber = Long.parseLong(params[0]);
            long blockColNumber = Long.parseLong(params[1]);
            double from = Double.parseDouble(params[2]);
            double to = Double.parseDouble(params[3]);
            double incr = Double.parseDouble(params[4]);

            //sequence data generation
            try {
                indexes[i].setIndexes(blockRowNumber, blockColNumber);
                block[i].seqOperationsInPlace(from, to, incr);
            } catch (DMLRuntimeException e) {
                throw new IOException(e);
            }
        } else {
            throw new IOException("Unknown data generation instruction: " + genInst.toString());
        }

        //put the input in the cache
        cachedValues.reset();
        cachedValues.set(genInst.output, indexes[i], block[i]);

        //special operations for individual mapp type
        specialOperationsForActualMap(i, out, reporter);
    }

    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.GMRCombiner.java

License:Open Source License

public void reduce(MatrixIndexes indexes, Iterator<TaggedMatrixValue> values,
        OutputCollector<MatrixIndexes, TaggedMatrixValue> out, Reporter report) throws IOException {
    long start = System.currentTimeMillis();

    cachedValues.reset();//from w ww.j  a va2s  .  c  om

    processAggregateInstructions(indexes, values, true);

    //output the matrices needed by the reducer
    outputInCombinerFromCachedValues(indexes, taggedbuffer, out);

    report.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}