List of usage examples for org.apache.hadoop.mapred Reporter getCounter
public abstract Counter getCounter(Enum<?> name);
From source file:BU.MET.CS755.SpeciesIterReducer2.java
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { double score = 0; String outLinks = ""; double oldScore = 0; // Counting links reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L); if (iterationNumber == 1) { return;/*from w w w . ja v a 2 s . c om*/ } while (values.hasNext()) { String curr = ((Text) values.next()).toString(); int colon = curr.indexOf(":"); int space = curr.indexOf(" "); int oldrank = curr.indexOf("oldrank"); if ((colon > -1)) { String presScore = curr.substring(0, colon); try { score += Double.parseDouble(presScore); oldScore = score; outLinks = curr.substring(colon + 1); continue; } catch (Exception e) { } } if (space > -1) { outLinks = curr; } else if (oldrank > -1) { oldScore = new Double(curr.substring(oldrank + 8)); } else { score += Double.parseDouble(curr); } } String toEmit; if (outLinks.length() > 0) { toEmit = (new Double(score)).toString() + ":" + outLinks; } else { toEmit = (new Double(score)).toString(); } // Output the new page rank output.collect(key, new Text(toEmit)); double delta = oldScore - score; // Check how much the new page rank has changed. If the change is less // than two decimal places, treat it as a converged value. If not, // we need to re-calculate the rank with one more iteration; inform the // driver about that by incrementing the iterations needed counter. if ((delta > 0.009) || (delta < -0.009)) { Counter myCounter2 = reporter .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED); if (myCounter2 != null) { reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L); } } }
From source file:cn.scala.es.ReportingUtils.java
License:Apache License
private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) { try {//from www. j a v a 2 s . c om org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter); if (c != null) { c.increment(value); } } catch (Exception ex) { // counter unavailable } }
From source file:com.mh2c.WikipediaDumpLoaderMapper.java
License:Apache License
/** * key = article content/* w w w . j ava2 s . co m*/ * value = empty string */ @Override public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try { // Parse the page of XML into a document Document doc = db.parse(new InputSource(new StringReader(key.toString()))); // Extract the title and text (article content) from the page content String title = doc.getElementsByTagName("title").item(0).getTextContent(); String text = doc.getElementsByTagName("text").item(0).getTextContent(); // Emit the title and text pair output.collect(new Text(title), new Text(text)); reporter.getCounter(Counter.ARTICLES).increment(1L); } catch (SAXException e) { throw new IOException(e); } }
From source file:mapreducecountersoldapi.MaxCountMapperOldApi.java
@Override public void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { String line[] = value.toString().split(","); String identificador = line[0]; String tiempo = line[1];//from w w w .j av a2 s . c o m String producto = line[2]; String categoria = line[3]; switch (categoria) { case "0": reporter.getCounter(Categoria.Cat0).increment(1); break; case "1": reporter.getCounter(Categoria.Cat1).increment(1); break; case "2": reporter.getCounter(Categoria.Cat2).increment(1); break; case "3": reporter.getCounter(Categoria.Cat3).increment(1); break; case "5": reporter.getCounter(Categoria.Cat5).increment(1); break; case "S": reporter.getCounter(Categoria.CatS).increment(1); break; } DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); try { Date tiempos = df.parse(tiempo); output.collect(new Text(identificador), new LongWritable(tiempos.getTime())); } catch (Exception e) { //context.write(new Text(identificador), new LongWritable(new Date().getTime())); System.out.println("Error Covnersion __________________"); } }
From source file:org.apache.mahout.clustering.lda.cvb.PriorTrainingReducer.java
License:Apache License
@Override public void reduce(IntWritable docId, Iterator<VectorWritable> vectors, OutputCollector<IntWritable, VectorWritable> out, Reporter reporter) throws IOException { if (this.reporter == null) { this.reporter = reporter; }//from w ww . j ava 2s .c om Counter docCounter = reporter.getCounter(Counters.DOCS); docCounter.increment(1); Vector topicVector = null; Vector document = null; while (vectors.hasNext()) { VectorWritable v = vectors.next(); /* * NOTE: we are susceptible to the pathological case of numTerms == numTopics (which should * never happen, as that would generate a horrible topic model), because we identify which * vector is the "prior" and which is the document by document.size() == numTerms */ if (v.get().size() == numTerms) { document = v.get(); } else { topicVector = v.get(); } } if (document == null) { if (topicVector != null) { reporter.getCounter(Counters.UNUSED_PRIORS).increment(1); } reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1); return; } else if (topicVector == null && onlyLabeledDocs) { reporter.getCounter(Counters.SKIPPED_DOC_IDS).increment(1); return; } else { if (topicVector == null) { topicVector = new DenseVector(numTopics).assign(1.0 / numTopics); } else { if (reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter() % 100 == 0) { long docsWithPriors = reporter.getCounter(Counters.DOCS_WITH_PRIORS).getCounter(); long skippedDocs = reporter.getCounter(Counters.SKIPPED_DOC_IDS).getCounter(); long total = reporter.getCounter(Counters.DOCS).getCounter(); log.info("Processed {} docs total, {} with priors, skipped {} docs", new Object[] { total, docsWithPriors, skippedDocs }); } reporter.getCounter(Counters.DOCS_WITH_PRIORS).increment(1); } modelTrainer.trainSync(document, topicVector, true, 1); multipleOutputs.getCollector(DOC_TOPICS, reporter).collect(docId, new VectorWritable(topicVector)); reporter.getCounter(Counters.USED_DOCS).increment(1); } }
From source file:org.elasticsearch.hadoop.mr.ReportingUtils.java
License:Apache License
private static void oldApiCounter(Reporter reporter, Enum<?> counter, long value) { org.apache.hadoop.mapred.Counters.Counter c = reporter.getCounter(counter); if (c != null) { c.increment(value);/*from www.j a v a 2 s. c o m*/ } }
From source file:org.warcbase.index.IndexerReducer.java
License:Apache License
@Override public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { SolrRecord solr;/*from w w w.j av a2 s . c o m*/ // Get the shard number, but counting from 1 instead of 0: int shard = key.get() + 1; // For indexing into HDFS, set up a new server per key: initEmbeddedServer(shard); // Go through the documents for this shard: long cnt = 0; while (values.hasNext()) { solr = values.next().getSolrRecord(); cnt++; docs.add(solr.getSolrDocument()); // Have we exceeded the batchSize? checkSubmission(docs, batchSize, reporter); // Occasionally update application-level status: if ((cnt % 1000) == 0) { reporter.setStatus(SHARD_PREFIX + shard + ": processed " + cnt + ", dropped " + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue()); } } try { // If we have at least one document unsubmitted, make sure we submit it. checkSubmission(docs, 1, reporter); // If we are indexing to HDFS, shut the shard down: // Commit, and block until the changes have been flushed. solrServer.commit(true, false); solrServer.shutdown(); } catch (Exception e) { LOG.error("ERROR on commit: " + e); e.printStackTrace(); } }
From source file:uk.bl.wa.hadoop.datasets.WARCDatasetReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Text wsr;//from w ww.j ava 2 s .c o m // Go through the documents for this shard: long noValues = 0; while (values.hasNext()) { wsr = values.next(); output.collect(key, wsr); noValues++; // Occasionally update application-level status: if ((noValues % 1000) == 0) { reporter.setStatus("Processed " + noValues + ", dropped " + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue()); } } }
From source file:uk.bl.wa.hadoop.indexer.WARCIndexerReducer.java
License:Open Source License
@Override public void reduce(IntWritable key, Iterator<WritableSolrRecord> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { WctEnricher wct;// w w w .jav a 2 s .co m WritableSolrRecord wsr; SolrRecord solr; // Get the slice number, but counting from 1 instead of 0: int slice = key.get() + 1; // Go through the documents for this shard: long noValues = 0; while (values.hasNext()) { wsr = values.next(); solr = wsr.getSolrRecord(); noValues++; // Add additional metadata for WCT Instances. if (solr.containsKey(WctFields.WCT_INSTANCE_ID)) { wct = new WctEnricher(key.toString()); wct.addWctMetadata(solr); } if (!dummyRun) { docs.add(solr.getSolrDocument()); // Have we exceeded the batchSize? checkSubmission(docs, batchSize, reporter); } else { log.info("DUMMY_RUN: Skipping addition of doc: " + solr.getField("id").getFirstValue()); } // Occasionally update application-level status: if ((noValues % 1000) == 0) { reporter.setStatus(this.shardPrefix + slice + ": processed " + noValues + ", dropped " + reporter.getCounter(MyCounters.NUM_DROPPED_RECORDS).getValue()); } if (this.exportXml && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE) != null && solr.getSolrDocument().getFieldValue(SolrFields.SOLR_URL_TYPE) .equals(SolrFields.SOLR_URL_TYPE_SLASHPAGE)) { output.collect(new Text(""), new Text(MetadataBuilder.SolrDocumentToElement(solr.getSolrDocument()))); } } try { /** * If we have at least one document unsubmitted, make sure we submit * it. */ checkSubmission(docs, 1, reporter); // If we are indexing to HDFS, shut the shard down: if (useEmbeddedServer) { // Commit, and block until the changes have been flushed. solrServer.commit(true, false); // And shut it down. solrServer.shutdown(); } } catch (Exception e) { log.error("ERROR on commit: " + e); e.printStackTrace(); } }
From source file:uk.bl.wa.hadoop.mapreduce.mdx.MDXReduplicatingReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {/*from w w w. j a va2 s. c o m*/ long noValues = 0; String json; MDX mdx; String exemplar = null; List<MDX> toReduplicate = new ArrayList<MDX>(); while (values.hasNext()) { json = values.next().toString(); mdx = new MDX(json); noValues++; // Collect the revisit records: if (revisit.equals(mdx.getRecordType())) { // Add this revisit record to the stack: reporter.incrCounter(MyCounters.NUM_REVISITS, 1); toReduplicate.add(mdx); } else { // Record a response record: if (exemplar == null && response.equals(mdx.getRecordType())) { exemplar = json; } // Collect complete records: output.collect(key, new Text(mdx.toString())); } // Report: reporter.incrCounter(MyCounters.NUM_RECORDS, 1); // Occasionally update status report: if ((noValues % 1000) == 0) { reporter.setStatus("Processed " + noValues + ", of which " + reporter.getCounter(MyCounters.NUM_REVISITS).getValue() + " records need reduplication."); } } // Mis-reduce status: log.info("Mid-reduce: Processed " + noValues + ", of which " + reporter.getCounter(MyCounters.NUM_REVISITS).getValue() + " records need reduplication."); // Now fix up revisits: for (MDX rmdxw : toReduplicate) { // Set outKey based on hash: // Text outKey = new rmdxw.getHash(); // Handle merge: if (exemplar != null) { // Modify record type and and merge the properties: MDX rmdx = new MDX(exemplar); @SuppressWarnings("unchecked") Iterator<String> keys = rmdxw.keys(); while (keys.hasNext()) { String k = keys.next(); rmdx.put(k, rmdxw.get(k)); } rmdx.setRecordType("reduplicated"); reporter.incrCounter(MyCounters.NUM_RESOLVED_REVISITS, 1); // Collect resolved records: output.collect(key, new Text(rmdx.toString())); } else { reporter.incrCounter(MyCounters.NUM_UNRESOLVED_REVISITS, 1); // Collect unresolved records: output.collect(key, new Text(rmdxw.toString())); } } } catch (JSONException e) { log.error("Exception in MDX reducer.", e); e.printStackTrace(); reporter.incrCounter(MyCounters.NUM_ERRORS, 1); } }