Example usage for org.apache.hadoop.mapred Reporter incrCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter incrCounter.

Prototype

public abstract void incrCounter(String group, String counter, long amount);

Source Link

Document

Increments the counter identified by the group and counter name by the specified amount.

Usage

From source file:RandIntPartSamplerMapper.java

License:Apache License

@Override
public void map(NullWritable lineNum, TextArrayWritable transactionsArrWr,
        OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException {
    reporter.incrCounter("FIMMapperStart", String.valueOf(id), System.currentTimeMillis());
    Random rand = new Random();
    Writable[] transactions = transactionsArrWr.get();
    int transactionsNum = transactions.length;
    System.out.println("transactionsNum: " + transactionsNum);
    for (int i = 0; i < toSample; i++) {
        int sampledIndex = rand.nextInt(transactionsNum);
        output.collect(sampleDestinations[i], (Text) transactions[sampledIndex]);
    }/*from   w  w  w. jav  a2s. c  o m*/
    reporter.incrCounter("FIMMapperEnd", String.valueOf(id), System.currentTimeMillis());
}

From source file:FIMReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, DoubleWritable> output,
        Reporter reporter) throws IOException {
    long startTime = System.currentTimeMillis();
    if (!set) {/*from  w  w  w  . jav a 2  s.  co m*/
        reporter.incrCounter("FIMReducerStart", String.valueOf(id), startTime);
        reporter.incrCounter("FIMReducerEnd", String.valueOf(id), startTime);
        set = true;
    }

    // This is a very crappy way of checking whether we got the
    // right number of transactions. It may not be too inefficient
    // though.
    ArrayList<Text> transactions = new ArrayList<Text>(sampleSize);
    while (values.hasNext()) {
        Text trans = new Text(values.next().toString());
        transactions.add(trans);
    }
    if (sampleSize != transactions.size()) {
        System.out.println("WRONG NUMBER OF TRANSACTIONS!");
    }
    System.out.println("samplesize: " + sampleSize + " received: " + transactions.size());
    FPgrowth.mineFrequentItemsets(transactions.iterator(), transactions.size(), minFreqPercent - (epsilon * 50),
            output);

    long endTime = System.currentTimeMillis();
    reporter.incrCounter("FIMReducerEnd", String.valueOf(id), endTime - startTime);
}

From source file:InputSamplerMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
        throws IOException {
    reporter.incrCounter("FIMMapperStart", String.valueOf(id), System.currentTimeMillis());
    IntArrayWritable arr = (IntArrayWritable) map.get(key);
    if (arr != null) {
        for (Writable element : arr.get()) {
            output.collect((IntWritable) element, value);
        }/*from  w  w w . ja  v a2s . com*/
    }
    reporter.incrCounter("FIMMapperEnd", String.valueOf(id), System.currentTimeMillis());
}

From source file:colossal.pipe.ColHadoopMapper.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w w  w.  j a v  a2  s  . co  m
public void map(KEY wrapper, VALUE value, OutputCollector<KO, VO> collector, Reporter reporter)
        throws IOException {
    if (this.context == null) {
        KeyExtractor<GenericData.Record, OUT> extractor = new ReflectionKeyExtractor<OUT>(schema, groupBy,
                sortBy);
        this.context = new ColContext<OUT>(new Collector(collector, extractor), reporter);
    }
    if (isTextInput) {
        mapper.map((IN) value, out, context);
    } else if (isStringInput) {
        mapper.map((IN) ((Text) value).toString(), out, context);
    } else if (isJsonInput) {
        String json = ((Text) value).toString();
        if (shouldSkip(json))
            return;
        // inefficient implementation of json to avro...
        // more efficient would be JsonToClass.jsonToRecord:
        //            mapper.map((IN) JsonToClass.jsonToRecord(json, inSchema), out, context);

        // silly conversion approach - serialize then deserialize
        try {
            GenericContainer c = JsonToGenericRecord.jsonToRecord(json, inSchema);
            GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<GenericContainer>(inSchema);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            writer.setSchema(inSchema);
            writer.write(c, new BinaryEncoder(bos));
            byte[] data = bos.toByteArray();

            GenericDatumReader<IN> reader = new SpecificDatumReader<IN>(inSchema);
            reader.setSchema(inSchema);

            IN converted = reader.read(null, DecoderFactory.defaultFactory().createBinaryDecoder(data, null));

            mapper.map(converted, out, context);
        } catch (JsonParseException jpe) {
            System.err.println("Failed to parse " + json + ": " + jpe.getMessage());
            reporter.incrCounter("ColHadoopMapper", "json-parse-error", 1L);
            if (++parseErrors > maxAllowedErrors) {
                throw new RuntimeException(jpe);
            }
        }
    } else {
        mapper.map(((AvroWrapper<IN>) wrapper).datum(), out, context);
    }
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockWriterMapper.java

License:Apache License

@Override
public void map(AvroFileHeader key, BytesWritable value, OutputCollector<BytesWritable, NullWritable> output,
        Reporter reporter) throws IOException {
    byte[] valueBytes = null;

    if (header.getSyncMarker() == null) {
        LOG.info("Writing new header for new file: {}", key.toString());
        header.set(key);/* w ww . ja  v a2s  .c  om*/
        output.collect(new BytesWritable(header.toBytes()), null);
    } else {
        AvroFileHeader newHeader = key;
        if (!header.getSchema().equals(newHeader.getSchema())) {
            throw new IOException("Schemas in files do not match.");
        }
        if (!header.getCodec().equals(newHeader.getCodec())) {
            throw new IOException("Codecs in files do not match.");
        }
    }

    if (value.getLength() == 0) {
        return;
    }

    valueBytes = Arrays.copyOfRange(value.getBytes(), 0, value.getLength());
    output.collect(new BytesWritable(valueBytes), null);
    output.collect(new BytesWritable(header.getSyncMarker()), null);

    reporter.incrCounter("Avro Block", "Blocks processed", 1);
    reporter.incrCounter("Avro Block", "Bytes processed", value.getLength() + 16);
}

From source file:com.digitalpebble.behemoth.BehemothMapper.java

License:Apache License

public void map(Text key, BehemothDocument inputDoc, OutputCollector<Text, BehemothDocument> output,
        Reporter reporter) throws IOException {
    boolean keep = docFilter.keep(inputDoc);
    if (!keep) {/*from  w w w .java 2 s.co  m*/
        reporter.incrCounter("BehemothMapper", "DOC SKIPPED BY FILTERS", 1);
        return;
    }
    output.collect(key, inputDoc);
}

From source file:com.digitalpebble.behemoth.BehemothReducer.java

License:Apache License

public void reduce(Text key, Iterator<BehemothDocument> doc, OutputCollector<Text, BehemothDocument> output,
        Reporter reporter) throws IOException {

    while (doc.hasNext()) {
        BehemothDocument inputDoc = doc.next();
        boolean keep = docFilter.keep(inputDoc);
        if (!keep) {
            reporter.incrCounter("BehemothReducer", "DOC SKIPPED BY FILTERS", 1);
            continue;
        }/* w  w w.java 2  s. co m*/
        output.collect(key, inputDoc);

    }

}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

public void map(Text key, BehemothDocument doc, OutputCollector<Text, BehemothDocument> collector,
        Reporter reported) throws IOException {

    // get the text
    if (doc.getText() == null || doc.getText().length() < 2) {
        reported.incrCounter("text classification", "MISSING TEXT", 1);
        filterOrCollect(key, doc, collector, reported);
        return;/*from  ww w  .  j av a 2  s .c o  m*/
    }
    // use the quick and dirty tokenization
    String[] tokens = Tokenizer.tokenize(doc.getText(), lowerCase);
    // TODO use annotations instead?
    Document tcdoc = classifier.createDocument(tokens);
    double[] scores;
    try {
        scores = classifier.classify(tcdoc);
    } catch (Exception e) {
        LOG.error("Exception while classifying", e);
        filterOrCollect(key, doc, collector, reported);
        reported.incrCounter("text classification", "EXCEPTION", 1);
        return;
    }
    String label = classifier.getBestLabel(scores);
    doc.getMetadata(true).put(new Text(docFeaturename), new Text(label));
    filterOrCollect(key, doc, collector, reported);
    reported.incrCounter("text classification", label, 1);
}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

private void filterOrCollect(Text key, BehemothDocument doc, OutputCollector<Text, BehemothDocument> collector,
        Reporter reported) throws IOException {
    if (filter.keep(doc)) {
        collector.collect(key, doc);// www .  j a va2 s  . c om
    } else
        reported.incrCounter("text classification", "FILTERED", 1l);
}

From source file:com.digitalpebble.behemoth.gate.GATEProcessor.java

License:Apache License

public synchronized String processNative(BehemothDocument inputDoc, Reporter reporter) {
    if (reporter != null)
        reporter.setStatus("GATE : " + inputDoc.getUrl().toString());
    // process the text passed as value with the application
    // a) create a GATE document based on the text value
    gate.Document gatedocument = null;
    try {//from   w  w  w  .ja  va2  s . c o m

        gatedocument = generateGATEDoc(inputDoc);
        // add it to the current corpus
        corpus.add(gatedocument);
        // get the application and assign the corpus to it
        this.GATEapplication.setCorpus(corpus);
        // process it with GATE
        this.GATEapplication.execute();

        // transfer the annotations from the GATE document
        // to the Behemoth one using the filters
        if (reporter != null)
            reporter.incrCounter("GATE", "Document", 1);

        return gatedocument.toXml();

    } catch (Exception e) {
        LOG.error(inputDoc.getUrl().toString(), e);
        if (reporter != null)
            reporter.incrCounter("GATE", "Exceptions", 1);
    } finally {
        // remove the document from the corpus again
        corpus.clear();
        // and from memory
        if (gatedocument != null)
            Factory.deleteResource(gatedocument);
    }
    return null;
}