Example usage for org.apache.hadoop.mapred Reporter incrCounter

List of usage examples for org.apache.hadoop.mapred Reporter incrCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter incrCounter.

Prototype

public abstract void incrCounter(String group, String counter, long amount);

Source Link

Document

Increments the counter identified by the group and counter name by the specified amount.

Usage

From source file:RandIntPartSamplerMapper.java

License:Apache License

@Override
public void map(NullWritable lineNum, TextArrayWritable transactionsArrWr,
        OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException {
    reporter.incrCounter("FIMMapperStart", String.valueOf(id), System.currentTimeMillis());
    Random rand = new Random();
    Writable[] transactions = transactionsArrWr.get();
    int transactionsNum = transactions.length;
    System.out.println("transactionsNum: " + transactionsNum);
    for (int i = 0; i < toSample; i++) {
        int sampledIndex = rand.nextInt(transactionsNum);
        output.collect(sampleDestinations[i], (Text) transactions[sampledIndex]);
    }/*from   w  w  w. jav  a2s. c  o m*/
    reporter.incrCounter("FIMMapperEnd", String.valueOf(id), System.currentTimeMillis());
}

From source file:FIMReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, DoubleWritable> output,
        Reporter reporter) throws IOException {
    long startTime = System.currentTimeMillis();
    if (!set) {/*from  w  w  w  . jav a 2  s.  co m*/
        reporter.incrCounter("FIMReducerStart", String.valueOf(id), startTime);
        reporter.incrCounter("FIMReducerEnd", String.valueOf(id), startTime);
        set = true;
    }

    // This is a very crappy way of checking whether we got the
    // right number of transactions. It may not be too inefficient
    // though.
    ArrayList<Text> transactions = new ArrayList<Text>(sampleSize);
    while (values.hasNext()) {
        Text trans = new Text(values.next().toString());
        transactions.add(trans);
    }
    if (sampleSize != transactions.size()) {
        System.out.println("WRONG NUMBER OF TRANSACTIONS!");
    }
    System.out.println("samplesize: " + sampleSize + " received: " + transactions.size());
    FPgrowth.mineFrequentItemsets(transactions.iterator(), transactions.size(), minFreqPercent - (epsilon * 50),
            output);

    long endTime = System.currentTimeMillis();
    reporter.incrCounter("FIMReducerEnd", String.valueOf(id), endTime - startTime);
}

From source file:InputSamplerMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
        throws IOException {
    reporter.incrCounter("FIMMapperStart", String.valueOf(id), System.currentTimeMillis());
    IntArrayWritable arr = (IntArrayWritable) map.get(key);
    if (arr != null) {
        for (Writable element : arr.get()) {
            output.collect((IntWritable) element, value);
        }/*from  w  w w . ja  v a2s . com*/
    }
    reporter.incrCounter("FIMMapperEnd", String.valueOf(id), System.currentTimeMillis());
}

From source file:colossal.pipe.ColHadoopMapper.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w w  w.  j a v  a2  s  . co  m
public void map(KEY wrapper, VALUE value, OutputCollector<KO, VO> collector, Reporter reporter)
        throws IOException {
    if (this.context == null) {
        KeyExtractor<GenericData.Record, OUT> extractor = new ReflectionKeyExtractor<OUT>(schema, groupBy,
                sortBy);
        this.context = new ColContext<OUT>(new Collector(collector, extractor), reporter);
    }
    if (isTextInput) {
        mapper.map((IN) value, out, context);
    } else if (isStringInput) {
        mapper.map((IN) ((Text) value).toString(), out, context);
    } else if (isJsonInput) {
        String json = ((Text) value).toString();
        if (shouldSkip(json))
            return;
        // inefficient implementation of json to avro...
        // more efficient would be JsonToClass.jsonToRecord:
        //            mapper.map((IN) JsonToClass.jsonToRecord(json, inSchema), out, context);

        // silly conversion approach - serialize then deserialize
        try {
            GenericContainer c = JsonToGenericRecord.jsonToRecord(json, inSchema);
            GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<GenericContainer>(inSchema);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            writer.setSchema(inSchema);
            writer.write(c, new BinaryEncoder(bos));
            byte[] data = bos.toByteArray();

            GenericDatumReader<IN> reader = new SpecificDatumReader<IN>(inSchema);
            reader.setSchema(inSchema);

            IN converted = reader.read(null, DecoderFactory.defaultFactory().createBinaryDecoder(data, null));

            mapper.map(converted, out, context);
        } catch (JsonParseException jpe) {
            System.err.println("Failed to parse " + json + ": " + jpe.getMessage());
            reporter.incrCounter("ColHadoopMapper", "json-parse-error", 1L);
            if (++parseErrors > maxAllowedErrors) {
                throw new RuntimeException(jpe);
            }
        }
    } else {
        mapper.map(((AvroWrapper<IN>) wrapper).datum(), out, context);
    }
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockWriterMapper.java

License:Apache License

@Override
public void map(AvroFileHeader key, BytesWritable value, OutputCollector<BytesWritable, NullWritable> output,
        Reporter reporter) throws IOException {
    byte[] valueBytes = null;

    if (header.getSyncMarker() == null) {
        LOG.info("Writing new header for new file: {}", key.toString());
        header.set(key);/* w ww . ja  v a2s  .c  om*/
        output.collect(new BytesWritable(header.toBytes()), null);
    } else {
        AvroFileHeader newHeader = key;
        if (!header.getSchema().equals(newHeader.getSchema())) {
            throw new IOException("Schemas in files do not match.");
        }
        if (!header.getCodec().equals(newHeader.getCodec())) {
            throw new IOException("Codecs in files do not match.");
        }
    }

    if (value.getLength() == 0) {
        return;
    }

    valueBytes = Arrays.copyOfRange(value.getBytes(), 0, value.getLength());
    output.collect(new BytesWritable(valueBytes), null);
    output.collect(new BytesWritable(header.getSyncMarker()), null);

    reporter.incrCounter("Avro Block", "Blocks processed", 1);
    reporter.incrCounter("Avro Block", "Bytes processed", value.getLength() + 16);
}

From source file:com.digitalpebble.behemoth.BehemothMapper.java

License:Apache License

public void map(Text key, BehemothDocument inputDoc, OutputCollector<Text, BehemothDocument> output,
        Reporter reporter) throws IOException {
    boolean keep = docFilter.keep(inputDoc);
    if (!keep) {/*from  w w w .java 2 s.co  m*/
        reporter.incrCounter("BehemothMapper", "DOC SKIPPED BY FILTERS", 1);
        return;
    }
    output.collect(key, inputDoc);
}

From source file:com.digitalpebble.behemoth.BehemothReducer.java

License:Apache License

public void reduce(Text key, Iterator<BehemothDocument> doc, OutputCollector<Text, BehemothDocument> output,
        Reporter reporter) throws IOException {

    while (doc.hasNext()) {
        BehemothDocument inputDoc = doc.next();
        boolean keep = docFilter.keep(inputDoc);
        if (!keep) {
            reporter.incrCounter("BehemothReducer", "DOC SKIPPED BY FILTERS", 1);
            continue;
        }/* w  w w.java 2  s. co m*/
        output.collect(key, inputDoc);

    }

}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

public void map(Text key, BehemothDocument doc, OutputCollector<Text, BehemothDocument> collector,
        Reporter reported) throws IOException {

    // get the text
    if (doc.getText() == null || doc.getText().length() < 2) {
        reported.incrCounter("text classification", "MISSING TEXT", 1);
        filterOrCollect(key, doc, collector, reported);
        return;/*from  ww w  .  j av a 2  s .c o  m*/
    }
    // use the quick and dirty tokenization
    String[] tokens = Tokenizer.tokenize(doc.getText(), lowerCase);
    // TODO use annotations instead?
    Document tcdoc = classifier.createDocument(tokens);
    double[] scores;
    try {
        scores = classifier.classify(tcdoc);
    } catch (Exception e) {
        LOG.error("Exception while classifying", e);
        filterOrCollect(key, doc, collector, reported);
        reported.incrCounter("text classification", "EXCEPTION", 1);
        return;
    }
    String label = classifier.getBestLabel(scores);
    doc.getMetadata(true).put(new Text(docFeaturename), new Text(label));
    filterOrCollect(key, doc, collector, reported);
    reported.incrCounter("text classification", label, 1);
}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

private void filterOrCollect(Text key, BehemothDocument doc, OutputCollector<Text, BehemothDocument> collector,
        Reporter reported) throws IOException {
    if (filter.keep(doc)) {
        collector.collect(key, doc);// www .  j a va2 s  . c om
    } else
        reported.incrCounter("text classification", "FILTERED", 1l);
}

From source file:com.digitalpebble.behemoth.gate.GATEProcessor.java

License:Apache License

public synchronized String processNative(BehemothDocument inputDoc, Reporter reporter) {
    if (reporter != null)
        reporter.setStatus("GATE : " + inputDoc.getUrl().toString());
    // process the text passed as value with the application
    // a) create a GATE document based on the text value
    gate.Document gatedocument = null;
    try {//from   w  w  w  .ja  va2  s . c o m

        gatedocument = generateGATEDoc(inputDoc);
        // add it to the current corpus
        corpus.add(gatedocument);
        // get the application and assign the corpus to it
        this.GATEapplication.setCorpus(corpus);
        // process it with GATE
        this.GATEapplication.execute();

        // transfer the annotations from the GATE document
        // to the Behemoth one using the filters
        if (reporter != null)
            reporter.incrCounter("GATE", "Document", 1);

        return gatedocument.toXml();

    } catch (Exception e) {
        LOG.error(inputDoc.getUrl().toString(), e);
        if (reporter != null)
            reporter.incrCounter("GATE", "Exceptions", 1);
    } finally {
        // remove the document from the corpus again
        corpus.clear();
        // and from memory
        if (gatedocument != null)
            Factory.deleteResource(gatedocument);
    }
    return null;
}