List of usage examples for org.apache.hadoop.mapred Reporter setStatus
public abstract void setStatus(String status);
From source file:StreamWikiDumpInputFormat.java
License:Apache License
public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { // handling non-standard record reader (likely StreamXmlRecordReader) FileSplit split = (FileSplit) genericSplit; LOG.info("getRecordReader start.....split=" + split); reporter.setStatus(split.toString()); // Open the file and seek to the start of the split FileSystem fs = split.getPath().getFileSystem(job); String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN); boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true); return new MyRecordReader(split, reporter, job, fs, patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev); }
From source file:StreamWikiDumpInputFormat.java
License:Apache License
private static List<Long> getPageBytes(FileSplit split, FileSystem fs, CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException { SeekableInputStream in = null;// w w w . ja v a2 s . c o m try { in = SeekableInputStream.getInstance(split, fs, compressionCodecs); long start = split.getStart(); long end = start + split.getLength(); InputStream cin = null; if (cin != null) { // start = cin.getAdjustedStart(); // end = cin.getAdjustedEnd() + 1; } ByteMatcher matcher = new ByteMatcher(in, in); List<Long> ret = new ArrayList<Long>(); while (true) { if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) { break; } ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length); if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) { System.err.println("could not find " + pageEndPattern + ", page over a split? pos=" + matcher.getPos() + " bytes=" + matcher.getReadBytes()); // ret.add(end); break; } ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length); String report = String.format( "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(), start, matcher.getPos(), end, matcher.getReadBytes()); reporter.setStatus(report); reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1); LOG.info(report); } if (ret.size() % 2 == 0) { ret.add(matcher.getReadBytes()); } // System.err.println("getPageBytes " + ret);//! return ret; } finally { if (in != null) { in.close(); } } }
From source file:SleepJob.java
License:Apache License
public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output, Reporter reporter) throws IOException { //it is expected that every map processes mapSleepCount number of records. try {/*from w ww. j a v a2 s . co m*/ reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left"); Thread.sleep(mapSleepDuration); } catch (InterruptedException ex) { throw (IOException) new IOException("Interrupted while sleeping").initCause(ex); } ++count; // output reduceSleepCount * numReduce number of random values, so that // each reducer will get reduceSleepCount number of keys. int k = key.get(); for (int i = 0; i < value.get(); ++i) { output.collect(new IntWritable(k + i), NullWritable.get()); } }
From source file:SleepJob.java
License:Apache License
public void reduce(IntWritable key, Iterator<NullWritable> values, OutputCollector<NullWritable, NullWritable> output, Reporter reporter) throws IOException { try {/* ww w.j a va 2s . c om*/ reporter.setStatus("Sleeping... (" + (reduceSleepDuration * (reduceSleepCount - count)) + ") ms left"); Thread.sleep(reduceSleepDuration); } catch (InterruptedException ex) { throw (IOException) new IOException("Interrupted while sleeping").initCause(ex); } count++; }
From source file:DataJoinReducerBase.java
License:Apache License
/** * This is the function that re-groups values for a key into sub-groups based * on a secondary key (input tag)./*from www . j ava 2 s .c om*/ * * @param arg1 * @return */ private SortedMap<Object, ResetableIterator> regroup(Object key, Iterator arg1, Reporter reporter) throws IOException { this.numOfValues = 0; SortedMap<Object, ResetableIterator> retv = new TreeMap<Object, ResetableIterator>(); TaggedMapOutput aRecord = null; while (arg1.hasNext()) { this.numOfValues += 1; if (this.numOfValues % 100 == 0) { reporter.setStatus("key: " + key.toString() + " numOfValues: " + this.numOfValues); } if (this.numOfValues > this.maxNumOfValuesPerGroup) { continue; } aRecord = ((TaggedMapOutput) arg1.next()).clone(job); Text tag = aRecord.getTag(); ResetableIterator data = retv.get(tag); if (data == null) { data = createResetableIterator(); retv.put(tag, data); } data.add(aRecord); } if (this.numOfValues > this.largestNumOfValues) { this.largestNumOfValues = numOfValues; LOG.info("key: " + key.toString() + " this.largestNumOfValues: " + this.largestNumOfValues); } return retv; }
From source file:DataJoinReducerBase.java
License:Apache License
/** * The subclass can overwrite this method to perform additional filtering * and/or other processing logic before a value is collected. * /*from w w w .j a v a 2s. com*/ * @param key * @param aRecord * @param output * @param reporter * @throws IOException */ protected void collect(Object key, TaggedMapOutput aRecord, OutputCollector output, Reporter reporter) throws IOException { this.collected += 1; addLongValue("collectedCount", 1); if (aRecord != null) { output.collect(key, aRecord.getData()); reporter.setStatus("key: " + key.toString() + " collected: " + collected); addLongValue("actuallyCollectedCount", 1); } }
From source file:SleepJobWithArray.java
License:Apache License
public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output, Reporter reporter) throws IOException { if (initBigArray) { // Yes, I should use log4j :-/ System.out.println("Requesting array of " + bigArraySize); int[] foo = new int[bigArraySize]; }// w w w. j a v a2 s . c o m //it is expected that every map processes mapSleepCount number of records. try { reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left"); Thread.sleep(mapSleepDuration); } catch (InterruptedException ex) { throw (IOException) new IOException("Interrupted while sleeping").initCause(ex); } ++count; // output reduceSleepCount * numReduce number of random values, so that // each reducer will get reduceSleepCount number of keys. int k = key.get(); for (int i = 0; i < value.get(); ++i) { output.collect(new IntWritable(k + i), NullWritable.get()); } }
From source file:alluxio.client.hadoop.AbstractIOMapper.java
License:Apache License
/** * Map file name and offset into statistical data. * <p>/*www . j a v a2s. c om*/ * The map task is to get the <tt>key</tt>, which contains the file name, and the <tt>value</tt>, * which is the offset within the file. * * The parameters are passed to the abstract method * {@link #doIO(Reporter, String,long)}, which performs the io operation, * usually read or write data, and then * {@link #collectStats(OutputCollector, String,long, Object)} is called * to prepare stat data for a subsequent reducer. */ @Override public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String name = key.toString(); long longValue = value.get(); reporter.setStatus("starting " + name + " ::host = " + mHostname); mStream = getIOStream(name); T statValue = null; long tStart = System.currentTimeMillis(); try { statValue = doIO(reporter, name, longValue); } finally { if (mStream != null) { mStream.close(); } } long tEnd = System.currentTimeMillis(); long execTime = tEnd - tStart; collectStats(output, name, execTime, statValue); reporter.setStatus("finished " + name + " ::host = " + mHostname); }
From source file:alluxio.client.hadoop.AccumulatingReducer.java
License:Apache License
/** * This method accumulates values based on their type. * * @param key the type of values//ww w . j a va 2s . com * @param values the values to accumulates * @param output collect the result of accumulating * @param reporter to report progress and update status information */ public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String field = key.toString(); reporter.setStatus("starting " + field + " ::host = " + mHostname); // concatenate strings if (field.startsWith(VALUE_TYPE_STRING)) { StringBuilder sSum = new StringBuilder(); while (values.hasNext()) { sSum.append(values.next().toString()).append(";"); } output.collect(key, new Text(sSum.toString())); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_FLOAT)) { float fSum = 0; while (values.hasNext()) { fSum += Float.parseFloat(values.next().toString()); } output.collect(key, new Text(String.valueOf(fSum))); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_LONG)) { long lSum = 0; while (values.hasNext()) { lSum += Long.parseLong(values.next().toString()); } output.collect(key, new Text(String.valueOf(lSum))); } reporter.setStatus("finished " + field + " ::host = " + mHostname); }
From source file:alluxio.hadoop.fs.AccumulatingReducer.java
License:Apache License
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String field = key.toString(); reporter.setStatus("starting " + field + " ::host = " + mHostname); // concatenate strings if (field.startsWith(VALUE_TYPE_STRING)) { StringBuffer sSum = new StringBuffer(); while (values.hasNext()) { sSum.append(values.next().toString()).append(";"); }//from ww w . j av a 2 s .c o m output.collect(key, new Text(sSum.toString())); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_FLOAT)) { float fSum = 0; while (values.hasNext()) { fSum += Float.parseFloat(values.next().toString()); } output.collect(key, new Text(String.valueOf(fSum))); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_LONG)) { long lSum = 0; while (values.hasNext()) { lSum += Long.parseLong(values.next().toString()); } output.collect(key, new Text(String.valueOf(lSum))); } reporter.setStatus("finished " + field + " ::host = " + mHostname); }