Example usage for org.apache.hadoop.mapred Reporter setStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter setStatus.

Prototype

public abstract void setStatus(String status);

Source Link

Document

Set the status description for the task.

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:StreamWikiDumpInputFormat.java

License:Apache License

private static List<Long> getPageBytes(FileSplit split, FileSystem fs,
        CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException {
    SeekableInputStream in = null;//  w  w  w .  ja  v a2  s  .  c o  m
    try {
        in = SeekableInputStream.getInstance(split, fs, compressionCodecs);
        long start = split.getStart();
        long end = start + split.getLength();
        InputStream cin = null;
        if (cin != null) {
            // start = cin.getAdjustedStart();
            // end = cin.getAdjustedEnd() + 1;
        }
        ByteMatcher matcher = new ByteMatcher(in, in);
        List<Long> ret = new ArrayList<Long>();
        while (true) {
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) {
                break;
            }
            ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length);
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) {
                System.err.println("could not find " + pageEndPattern + ", page over a split?  pos="
                        + matcher.getPos() + " bytes=" + matcher.getReadBytes());
                // ret.add(end);
                break;
            }
            ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length);
            String report = String.format(
                    "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(),
                    start, matcher.getPos(), end, matcher.getReadBytes());
            reporter.setStatus(report);
            reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1);
            LOG.info(report);
        }
        if (ret.size() % 2 == 0) {
            ret.add(matcher.getReadBytes());
        }
        // System.err.println("getPageBytes " + ret);//!
        return ret;
    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:SleepJob.java

License:Apache License

public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output,
        Reporter reporter) throws IOException {

    //it is expected that every map processes mapSleepCount number of records. 
    try {/*from w ww. j a  v a2 s . co  m*/
        reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

From source file:SleepJob.java

License:Apache License

public void reduce(IntWritable key, Iterator<NullWritable> values,
        OutputCollector<NullWritable, NullWritable> output, Reporter reporter) throws IOException {
    try {/*  ww w.j  a va 2s .  c  om*/
        reporter.setStatus("Sleeping... (" + (reduceSleepDuration * (reduceSleepCount - count)) + ") ms left");
        Thread.sleep(reduceSleepDuration);

    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    count++;
}

From source file:DataJoinReducerBase.java

License:Apache License

/**
 * This is the function that re-groups values for a key into sub-groups based
 * on a secondary key (input tag)./*from  www .  j ava  2 s  .c om*/
 * 
 * @param arg1
 * @return
 */
private SortedMap<Object, ResetableIterator> regroup(Object key, Iterator arg1, Reporter reporter)
        throws IOException {
    this.numOfValues = 0;
    SortedMap<Object, ResetableIterator> retv = new TreeMap<Object, ResetableIterator>();
    TaggedMapOutput aRecord = null;
    while (arg1.hasNext()) {
        this.numOfValues += 1;
        if (this.numOfValues % 100 == 0) {
            reporter.setStatus("key: " + key.toString() + " numOfValues: " + this.numOfValues);
        }
        if (this.numOfValues > this.maxNumOfValuesPerGroup) {
            continue;
        }
        aRecord = ((TaggedMapOutput) arg1.next()).clone(job);
        Text tag = aRecord.getTag();
        ResetableIterator data = retv.get(tag);
        if (data == null) {
            data = createResetableIterator();
            retv.put(tag, data);
        }
        data.add(aRecord);
    }
    if (this.numOfValues > this.largestNumOfValues) {
        this.largestNumOfValues = numOfValues;
        LOG.info("key: " + key.toString() + " this.largestNumOfValues: " + this.largestNumOfValues);
    }
    return retv;
}

From source file:DataJoinReducerBase.java

License:Apache License

/**
 * The subclass can overwrite this method to perform additional filtering
 * and/or other processing logic before a value is collected.
 * /*from w w  w .j a v a  2s. com*/
 * @param key
 * @param aRecord
 * @param output
 * @param reporter
 * @throws IOException
 */
protected void collect(Object key, TaggedMapOutput aRecord, OutputCollector output, Reporter reporter)
        throws IOException {
    this.collected += 1;
    addLongValue("collectedCount", 1);
    if (aRecord != null) {
        output.collect(key, aRecord.getData());
        reporter.setStatus("key: " + key.toString() + " collected: " + collected);
        addLongValue("actuallyCollectedCount", 1);
    }
}

From source file:SleepJobWithArray.java

License:Apache License

public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output,
        Reporter reporter) throws IOException {

    if (initBigArray) {
        // Yes, I should use log4j :-/
        System.out.println("Requesting array of " + bigArraySize);
        int[] foo = new int[bigArraySize];
    }//  w  w  w. j a v a2  s .  c o  m
    //it is expected that every map processes mapSleepCount number of records. 
    try {
        reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

From source file:alluxio.client.hadoop.AbstractIOMapper.java

License:Apache License

/**
 * Map file name and offset into statistical data.
 * <p>/*www .  j a  v  a2s.  c  om*/
 * The map task is to get the <tt>key</tt>, which contains the file name, and the <tt>value</tt>,
 * which is the offset within the file.
 *
 * The parameters are passed to the abstract method
 * {@link #doIO(Reporter, String,long)}, which performs the io operation,
 * usually read or write data, and then
 * {@link #collectStats(OutputCollector, String,long, Object)} is called
 * to prepare stat data for a subsequent reducer.
 */
@Override
public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String name = key.toString();
    long longValue = value.get();

    reporter.setStatus("starting " + name + " ::host = " + mHostname);

    mStream = getIOStream(name);
    T statValue = null;
    long tStart = System.currentTimeMillis();
    try {
        statValue = doIO(reporter, name, longValue);
    } finally {
        if (mStream != null) {
            mStream.close();
        }
    }
    long tEnd = System.currentTimeMillis();
    long execTime = tEnd - tStart;
    collectStats(output, name, execTime, statValue);

    reporter.setStatus("finished " + name + " ::host = " + mHostname);
}

From source file:alluxio.client.hadoop.AccumulatingReducer.java

License:Apache License

/**
 * This method accumulates values based on their type.
 *
 * @param key the type of values//ww  w . j a  va 2s .  com
 * @param values the values to accumulates
 * @param output collect the result of accumulating
 * @param reporter to report progress and update status information
 */
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuilder sSum = new StringBuilder();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}

From source file:alluxio.hadoop.fs.AccumulatingReducer.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuffer sSum = new StringBuffer();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }//from  ww w  . j  av a  2  s .c o  m
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}