Example usage for org.apache.hadoop.mapred Reporter getCounter

List of usage examples for org.apache.hadoop.mapred Reporter getCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter getCounter.

Prototype

public abstract Counter getCounter(Enum<?> name);

Source Link

Document

Get the Counter of the given group with the given name.

Usage

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;/*from w  w w .  ja  v a  2 s . c  om*/
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:cn.scala.es.ReportingUtils.java

License:Apache License

private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) {
    try {//from  www.  j  a  v a  2  s .  c  om
        org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter);
        if (c != null) {
            c.increment(value);
        }
    } catch (Exception ex) {
        // counter unavailable
    }
}

From source file:com.mh2c.WikipediaDumpLoaderMapper.java

License:Apache License

/**
 * key = article content/* w w  w . j  ava2 s . co  m*/
 * value = empty string
 */
@Override
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    try {
        // Parse the page of XML into a document
        Document doc = db.parse(new InputSource(new StringReader(key.toString())));

        // Extract the title and text (article content) from the page content
        String title = doc.getElementsByTagName("title").item(0).getTextContent();
        String text = doc.getElementsByTagName("text").item(0).getTextContent();

        // Emit the title and text pair
        output.collect(new Text(title), new Text(text));
        reporter.getCounter(Counter.ARTICLES).increment(1L);
    } catch (SAXException e) {
        throw new IOException(e);
    }
}

From source file:mapreducecountersoldapi.MaxCountMapperOldApi.java

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter)
        throws IOException {

    String line[] = value.toString().split(",");
    String identificador = line[0];
    String tiempo = line[1];//from w w w .j  av  a2 s . c  o m
    String producto = line[2];
    String categoria = line[3];

    switch (categoria) {
    case "0":
        reporter.getCounter(Categoria.Cat0).increment(1);
        break;
    case "1":
        reporter.getCounter(Categoria.Cat1).increment(1);
        break;
    case "2":
        reporter.getCounter(Categoria.Cat2).increment(1);
        break;
    case "3":
        reporter.getCounter(Categoria.Cat3).increment(1);
        break;
    case "5":
        reporter.getCounter(Categoria.Cat5).increment(1);
        break;
    case "S":
        reporter.getCounter(Categoria.CatS).increment(1);
        break;
    }

    DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
    try {
        Date tiempos = df.parse(tiempo);
        output.collect(new Text(identificador), new LongWritable(tiempos.getTime()));
    } catch (Exception e) {
        //context.write(new Text(identificador), new LongWritable(new Date().getTime()));
        System.out.println("Error Covnersion __________________");
    }

}

From source file:org.apache.mahout.clustering.lda.cvb.PriorTrainingReducer.java

License:Apache License

@Override
public void reduce(IntWritable docId, Iterator<VectorWritable> vectors,
        OutputCollector<IntWritable, VectorWritable> out, Reporter reporter) throws IOException {
    if (this.reporter == null) {
        this.reporter = reporter;
    }//from w ww . j ava 2s  .c  om
    Counter docCounter = reporter.getCounter(Counters.DOCS);
    docCounter.increment(1);
    Vector topicVector = null;
    Vector document = null;
    while (vectors.hasNext()) {
        VectorWritable v = vectors.next();
        /*
         *  NOTE: we are susceptible to the pathological case of numTerms == numTopics (which should
         *  never happen, as that would generate a horrible topic model), because we identify which
         *  vector is the "prior" and which is the document by document.size() == numTerms
         */
        if (v.get().size() == numTerms) {
            document = v.get();
        } else {
            topicVector = v.get();
        }
    }
    if (document == null) {
        if (topicVector != null) {
            reporter.getCounter(Counters.UNUSED_PRIORS).increment(1);
        }
        reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1);
        return;
    } else if (topicVector == null && onlyLabeledDocs) {
        reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1);
        return;
    } else {
        if (topicVector == null) {
            topicVector = new DenseVector(numTopics).assign(1.0 / numTopics);
        } else {
            if (reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter() % 100 == 0) {
                long docsWithPriors = reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter();
                long skippedDocs = reporter.getCounter(Counters.SKIPPED_DOC_IDS).getCounter();
                long total = reporter.getCounter(Counters.DOCS).getCounter();
                log.info("Processed {} docs total, {} with priors, skipped {} docs",
                        new Object[] { total, docsWithPriors, skippedDocs });
            }
            reporter.getCounter(Counters.DOCS_WITH_PRIORS).increment(1);
        }
        modelTrainer.trainSync(document, topicVector, true, 1);
        multipleOutputs.getCollector(DOC_TOPICS, reporter).collect(docId, new VectorWritable(topicVector));
        reporter.getCounter(Counters.USED_DOCS).increment(1);
    }
}

From source file:org.elasticsearch.hadoop.mr.ReportingUtils.java

License:Apache License

private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) {
    org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter);
    if (c != null) {
        c.increment(value);/*from  www.j  a  v a  2  s.  c o  m*/
    }
}

From source file:org.warcbase.index.IndexerReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    SolrRecord solr;/*from   w  w w.j av  a2 s  . c o  m*/

    // Get the shard number, but counting from 1 instead of 0:
    int shard = key.get() + 1;

    // For indexing into HDFS, set up a new server per key:
    initEmbeddedServer(shard);

    // Go through the documents for this shard:
    long cnt = 0;
    while (values.hasNext()) {
        solr = values.next().getSolrRecord();
        cnt++;

        docs.add(solr.getSolrDocument());
        // Have we exceeded the batchSize?
        checkSubmission(docs, batchSize, reporter);

        // Occasionally update application-level status:
        if ((cnt % 1000) == 0) {
            reporter.setStatus(SHARD_PREFIX + shard + ": processed " + cnt + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
    }

    try {
        // If we have at least one document unsubmitted, make sure we submit it.
        checkSubmission(docs, 1, reporter);

        // If we are indexing to HDFS, shut the shard down:
        // Commit, and block until the changes have been flushed.
        solrServer.commit(true, false);
        solrServer.shutdown();
    } catch (Exception e) {
        LOG.error("ERROR on commit: " + e);
        e.printStackTrace();
    }
}

From source file:uk.bl.wa.hadoop.datasets.WARCDatasetReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    Text wsr;//from w  ww.j  ava  2 s .c  o  m

    // Go through the documents for this shard:
    long noValues = 0;
    while (values.hasNext()) {
        wsr = values.next();
        output.collect(key, wsr);
        noValues++;

        // Occasionally update application-level status:
        if ((noValues % 1000) == 0) {
            reporter.setStatus("Processed " + noValues + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
    }

}

From source file:uk.bl.wa.hadoop.indexer.WARCIndexerReducer.java

License:Open Source License

@Override
public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    WctEnricher wct;//  w  w  w .jav  a 2  s  .co  m
    WritableSolrRecord wsr;
    SolrRecord solr;

    // Get the slice number, but counting from 1 instead of 0:
    int slice = key.get() + 1;

    // Go through the documents for this shard:
    long noValues = 0;
    while (values.hasNext()) {
        wsr = values.next();
        solr = wsr.getSolrRecord();
        noValues++;

        // Add additional metadata for WCT Instances.
        if (solr.containsKey(WctFields.WCT_INSTANCE_ID)) {
            wct = new WctEnricher(key.toString());
            wct.addWctMetadata(solr);
        }
        if (!dummyRun) {
            docs.add(solr.getSolrDocument());
            // Have we exceeded the batchSize?
            checkSubmission(docs, batchSize, reporter);
        } else {
            log.info("DUMMY_RUN: Skipping addition of doc: " + solr.getField("id").getFirstValue());
        }

        // Occasionally update application-level status:
        if ((noValues % 1000) == 0) {
            reporter.setStatus(this.shardPrefix + slice + ": processed " + noValues + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
        if (this.exportXml && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE) != null
                && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE)
                        .equals(SolrFields.SOLR_URL_TYPE_SLASHPAGE)) {
            output.collect(new Text(""),
                    new Text(MetadataBuilder.SolrDocumentToElement(solr.getSolrDocument())));
        }
    }

    try {
        /**
         * If we have at least one document unsubmitted, make sure we submit
         * it.
         */
        checkSubmission(docs, 1, reporter);

        // If we are indexing to HDFS, shut the shard down:
        if (useEmbeddedServer) {
            // Commit, and block until the changes have been flushed.
            solrServer.commit(true, false);
            // And shut it down.
            solrServer.shutdown();
        }

    } catch (Exception e) {
        log.error("ERROR on commit: " + e);
        e.printStackTrace();
    }

}

From source file:uk.bl.wa.hadoop.mapreduce.mdx.MDXReduplicatingReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {/*from  w w w. j a va2  s. c o  m*/
        long noValues = 0;
        String json;
        MDX mdx;
        String exemplar = null;
        List<MDX> toReduplicate = new ArrayList<MDX>();
        while (values.hasNext()) {
            json = values.next().toString();
            mdx = new MDX(json);
            noValues++;

            // Collect the revisit records:
            if (revisit.equals(mdx.getRecordType())) {
                // Add this revisit record to the stack:
                reporter.incrCounter(MyCounters.NUM_REVISITS, 1);
                toReduplicate.add(mdx);
            } else {
                // Record a response record:
                if (exemplar == null && response.equals(mdx.getRecordType())) {
                    exemplar = json;
                }
                // Collect complete records:
                output.collect(key, new Text(mdx.toString()));
            }

            // Report:
            reporter.incrCounter(MyCounters.NUM_RECORDS, 1);

            // Occasionally update status report:
            if ((noValues % 1000) == 0) {
                reporter.setStatus("Processed " + noValues + ", of which "
                        + reporter.getCounter(MyCounters.NUM_REVISITS).getValue()
                        + " records need reduplication.");
            }

        }

        // Mis-reduce status:
        log.info("Mid-reduce: Processed " + noValues + ", of which "
                + reporter.getCounter(MyCounters.NUM_REVISITS).getValue() + " records need reduplication.");

        // Now fix up revisits:
        for (MDX rmdxw : toReduplicate) {
            // Set outKey based on hash:
            // Text outKey = new rmdxw.getHash();
            // Handle merge:
            if (exemplar != null) {
                // Modify record type and and merge the properties:
                MDX rmdx = new MDX(exemplar);
                @SuppressWarnings("unchecked")
                Iterator<String> keys = rmdxw.keys();
                while (keys.hasNext()) {
                    String k = keys.next();
                    rmdx.put(k, rmdxw.get(k));
                }
                rmdx.setRecordType("reduplicated");
                reporter.incrCounter(MyCounters.NUM_RESOLVED_REVISITS, 1);
                // Collect resolved records:
                output.collect(key, new Text(rmdx.toString()));
            } else {
                reporter.incrCounter(MyCounters.NUM_UNRESOLVED_REVISITS, 1);
                // Collect unresolved records:
                output.collect(key, new Text(rmdxw.toString()));
            }
        }
    } catch (JSONException e) {
        log.error("Exception in MDX reducer.", e);
        e.printStackTrace();
        reporter.incrCounter(MyCounters.NUM_ERRORS, 1);
    }

}