Example usage for org.apache.hadoop.mapred Reporter getCounter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter getCounter.

Prototype

public abstract Counter getCounter(Enum<?> name);

Source Link

Document

Get the Counter of the given group with the given name.

Usage

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;/*from w  w w .  ja  v a  2 s . c  om*/
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:cn.scala.es.ReportingUtils.java

License:Apache License

private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) {
    try {//from  www.  j  a  v a  2  s .  c  om
        org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter);
        if (c != null) {
            c.increment(value);
        }
    } catch (Exception ex) {
        // counter unavailable
    }
}

From source file:com.mh2c.WikipediaDumpLoaderMapper.java

License:Apache License

/**
 * key = article content/* w w  w . j  ava2 s . co  m*/
 * value = empty string
 */
@Override
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    try {
        // Parse the page of XML into a document
        Document doc = db.parse(new InputSource(new StringReader(key.toString())));

        // Extract the title and text (article content) from the page content
        String title = doc.getElementsByTagName("title").item(0).getTextContent();
        String text = doc.getElementsByTagName("text").item(0).getTextContent();

        // Emit the title and text pair
        output.collect(new Text(title), new Text(text));
        reporter.getCounter(Counter.ARTICLES).increment(1L);
    } catch (SAXException e) {
        throw new IOException(e);
    }
}

From source file:mapreducecountersoldapi.MaxCountMapperOldApi.java

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter)
        throws IOException {

    String line[] = value.toString().split(",");
    String identificador = line[0];
    String tiempo = line[1];//from w w w .j  av  a2 s . c  o m
    String producto = line[2];
    String categoria = line[3];

    switch (categoria) {
    case "0":
        reporter.getCounter(Categoria.Cat0).increment(1);
        break;
    case "1":
        reporter.getCounter(Categoria.Cat1).increment(1);
        break;
    case "2":
        reporter.getCounter(Categoria.Cat2).increment(1);
        break;
    case "3":
        reporter.getCounter(Categoria.Cat3).increment(1);
        break;
    case "5":
        reporter.getCounter(Categoria.Cat5).increment(1);
        break;
    case "S":
        reporter.getCounter(Categoria.CatS).increment(1);
        break;
    }

    DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
    try {
        Date tiempos = df.parse(tiempo);
        output.collect(new Text(identificador), new LongWritable(tiempos.getTime()));
    } catch (Exception e) {
        //context.write(new Text(identificador), new LongWritable(new Date().getTime()));
        System.out.println("Error Covnersion __________________");
    }

}

From source file:org.apache.mahout.clustering.lda.cvb.PriorTrainingReducer.java

License:Apache License

@Override
public void reduce(IntWritable docId, Iterator<VectorWritable> vectors,
        OutputCollector<IntWritable, VectorWritable> out, Reporter reporter) throws IOException {
    if (this.reporter == null) {
        this.reporter = reporter;
    }//from w ww . j ava 2s  .c  om
    Counter docCounter = reporter.getCounter(Counters.DOCS);
    docCounter.increment(1);
    Vector topicVector = null;
    Vector document = null;
    while (vectors.hasNext()) {
        VectorWritable v = vectors.next();
        /*
         *  NOTE: we are susceptible to the pathological case of numTerms == numTopics (which should
         *  never happen, as that would generate a horrible topic model), because we identify which
         *  vector is the "prior" and which is the document by document.size() == numTerms
         */
        if (v.get().size() == numTerms) {
            document = v.get();
        } else {
            topicVector = v.get();
        }
    }
    if (document == null) {
        if (topicVector != null) {
            reporter.getCounter(Counters.UNUSED_PRIORS).increment(1);
        }
        reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1);
        return;
    } else if (topicVector == null && onlyLabeledDocs) {
        reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1);
        return;
    } else {
        if (topicVector == null) {
            topicVector = new DenseVector(numTopics).assign(1.0 / numTopics);
        } else {
            if (reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter() % 100 == 0) {
                long docsWithPriors = reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter();
                long skippedDocs = reporter.getCounter(Counters.SKIPPED_DOC_IDS).getCounter();
                long total = reporter.getCounter(Counters.DOCS).getCounter();
                log.info("Processed {} docs total, {} with priors, skipped {} docs",
                        new Object[] { total, docsWithPriors, skippedDocs });
            }
            reporter.getCounter(Counters.DOCS_WITH_PRIORS).increment(1);
        }
        modelTrainer.trainSync(document, topicVector, true, 1);
        multipleOutputs.getCollector(DOC_TOPICS, reporter).collect(docId, new VectorWritable(topicVector));
        reporter.getCounter(Counters.USED_DOCS).increment(1);
    }
}

From source file:org.elasticsearch.hadoop.mr.ReportingUtils.java

License:Apache License

private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) {
    org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter);
    if (c != null) {
        c.increment(value);/*from  www.j  a  v a  2  s.  c o  m*/
    }
}

From source file:org.warcbase.index.IndexerReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    SolrRecord solr;/*from   w  w w.j av  a2 s  . c o  m*/

    // Get the shard number, but counting from 1 instead of 0:
    int shard = key.get() + 1;

    // For indexing into HDFS, set up a new server per key:
    initEmbeddedServer(shard);

    // Go through the documents for this shard:
    long cnt = 0;
    while (values.hasNext()) {
        solr = values.next().getSolrRecord();
        cnt++;

        docs.add(solr.getSolrDocument());
        // Have we exceeded the batchSize?
        checkSubmission(docs, batchSize, reporter);

        // Occasionally update application-level status:
        if ((cnt % 1000) == 0) {
            reporter.setStatus(SHARD_PREFIX + shard + ": processed " + cnt + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
    }

    try {
        // If we have at least one document unsubmitted, make sure we submit it.
        checkSubmission(docs, 1, reporter);

        // If we are indexing to HDFS, shut the shard down:
        // Commit, and block until the changes have been flushed.
        solrServer.commit(true, false);
        solrServer.shutdown();
    } catch (Exception e) {
        LOG.error("ERROR on commit: " + e);
        e.printStackTrace();
    }
}

From source file:uk.bl.wa.hadoop.datasets.WARCDatasetReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    Text wsr;//from w  ww.j  ava  2 s .c  o  m

    // Go through the documents for this shard:
    long noValues = 0;
    while (values.hasNext()) {
        wsr = values.next();
        output.collect(key, wsr);
        noValues++;

        // Occasionally update application-level status:
        if ((noValues % 1000) == 0) {
            reporter.setStatus("Processed " + noValues + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
    }

}

From source file:uk.bl.wa.hadoop.indexer.WARCIndexerReducer.java

License:Open Source License

@Override
public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    WctEnricher wct;//  w  w  w .jav  a 2  s  .co  m
    WritableSolrRecord wsr;
    SolrRecord solr;

    // Get the slice number, but counting from 1 instead of 0:
    int slice = key.get() + 1;

    // Go through the documents for this shard:
    long noValues = 0;
    while (values.hasNext()) {
        wsr = values.next();
        solr = wsr.getSolrRecord();
        noValues++;

        // Add additional metadata for WCT Instances.
        if (solr.containsKey(WctFields.WCT_INSTANCE_ID)) {
            wct = new WctEnricher(key.toString());
            wct.addWctMetadata(solr);
        }
        if (!dummyRun) {
            docs.add(solr.getSolrDocument());
            // Have we exceeded the batchSize?
            checkSubmission(docs, batchSize, reporter);
        } else {
            log.info("DUMMY_RUN: Skipping addition of doc: " + solr.getField("id").getFirstValue());
        }

        // Occasionally update application-level status:
        if ((noValues % 1000) == 0) {
            reporter.setStatus(this.shardPrefix + slice + ": processed " + noValues + ", dropped "
                    + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue());
        }
        if (this.exportXml && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE) != null
                && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE)
                        .equals(SolrFields.SOLR_URL_TYPE_SLASHPAGE)) {
            output.collect(new Text(""),
                    new Text(MetadataBuilder.SolrDocumentToElement(solr.getSolrDocument())));
        }
    }

    try {
        /**
         * If we have at least one document unsubmitted, make sure we submit
         * it.
         */
        checkSubmission(docs, 1, reporter);

        // If we are indexing to HDFS, shut the shard down:
        if (useEmbeddedServer) {
            // Commit, and block until the changes have been flushed.
            solrServer.commit(true, false);
            // And shut it down.
            solrServer.shutdown();
        }

    } catch (Exception e) {
        log.error("ERROR on commit: " + e);
        e.printStackTrace();
    }

}

From source file:uk.bl.wa.hadoop.mapreduce.mdx.MDXReduplicatingReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {/*from  w w w. j a va2  s. c o  m*/
        long noValues = 0;
        String json;
        MDX mdx;
        String exemplar = null;
        List<MDX> toReduplicate = new ArrayList<MDX>();
        while (values.hasNext()) {
            json = values.next().toString();
            mdx = new MDX(json);
            noValues++;

            // Collect the revisit records:
            if (revisit.equals(mdx.getRecordType())) {
                // Add this revisit record to the stack:
                reporter.incrCounter(MyCounters.NUM_REVISITS, 1);
                toReduplicate.add(mdx);
            } else {
                // Record a response record:
                if (exemplar == null && response.equals(mdx.getRecordType())) {
                    exemplar = json;
                }
                // Collect complete records:
                output.collect(key, new Text(mdx.toString()));
            }

            // Report:
            reporter.incrCounter(MyCounters.NUM_RECORDS, 1);

            // Occasionally update status report:
            if ((noValues % 1000) == 0) {
                reporter.setStatus("Processed " + noValues + ", of which "
                        + reporter.getCounter(MyCounters.NUM_REVISITS).getValue()
                        + " records need reduplication.");
            }

        }

        // Mis-reduce status:
        log.info("Mid-reduce: Processed " + noValues + ", of which "
                + reporter.getCounter(MyCounters.NUM_REVISITS).getValue() + " records need reduplication.");

        // Now fix up revisits:
        for (MDX rmdxw : toReduplicate) {
            // Set outKey based on hash:
            // Text outKey = new rmdxw.getHash();
            // Handle merge:
            if (exemplar != null) {
                // Modify record type and and merge the properties:
                MDX rmdx = new MDX(exemplar);
                @SuppressWarnings("unchecked")
                Iterator<String> keys = rmdxw.keys();
                while (keys.hasNext()) {
                    String k = keys.next();
                    rmdx.put(k, rmdxw.get(k));
                }
                rmdx.setRecordType("reduplicated");
                reporter.incrCounter(MyCounters.NUM_RESOLVED_REVISITS, 1);
                // Collect resolved records:
                output.collect(key, new Text(rmdx.toString()));
            } else {
                reporter.incrCounter(MyCounters.NUM_UNRESOLVED_REVISITS, 1);
                // Collect unresolved records:
                output.collect(key, new Text(rmdxw.toString()));
            }
        }
    } catch (JSONException e) {
        log.error("Exception in MDX reducer.", e);
        e.printStackTrace();
        reporter.incrCounter(MyCounters.NUM_ERRORS, 1);
    }

}