Example usage for org.apache.hadoop.mapred Reporter setStatus

List of usage examples for org.apache.hadoop.mapred Reporter setStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter setStatus.

Prototype

public abstract void setStatus(String status);

Source Link

Document

Set the status description for the task.

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:StreamWikiDumpInputFormat.java

License:Apache License

private static List<Long> getPageBytes(FileSplit split, FileSystem fs,
        CompressionCodecFactory compressionCodecs, Reporter reporter) throws IOException {
    SeekableInputStream in = null;//  w  w  w .  ja  v a2  s  .  c o  m
    try {
        in = SeekableInputStream.getInstance(split, fs, compressionCodecs);
        long start = split.getStart();
        long end = start + split.getLength();
        InputStream cin = null;
        if (cin != null) {
            // start = cin.getAdjustedStart();
            // end = cin.getAdjustedEnd() + 1;
        }
        ByteMatcher matcher = new ByteMatcher(in, in);
        List<Long> ret = new ArrayList<Long>();
        while (true) {
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageBeginPattern, null, end)) {
                break;
            }
            ret.add(matcher.getReadBytes() - pageBeginPattern.getBytes("UTF-8").length);
            if (matcher.getPos() >= end || !matcher.readUntilMatch(pageEndPattern, null, end)) {
                System.err.println("could not find " + pageEndPattern + ", page over a split?  pos="
                        + matcher.getPos() + " bytes=" + matcher.getReadBytes());
                // ret.add(end);
                break;
            }
            ret.add(matcher.getReadBytes() - pageEndPattern.getBytes("UTF-8").length);
            String report = String.format(
                    "StreamWikiDumpInputFormat: find page %6d start=%d pos=%d end=%d bytes=%d", ret.size(),
                    start, matcher.getPos(), end, matcher.getReadBytes());
            reporter.setStatus(report);
            reporter.incrCounter(WikiDumpCounters.FOUND_PAGES, 1);
            LOG.info(report);
        }
        if (ret.size() % 2 == 0) {
            ret.add(matcher.getReadBytes());
        }
        // System.err.println("getPageBytes " + ret);//!
        return ret;
    } finally {
        if (in != null) {
            in.close();
        }
    }
}

From source file:SleepJob.java

License:Apache License

public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output,
        Reporter reporter) throws IOException {

    //it is expected that every map processes mapSleepCount number of records. 
    try {/*from w ww. j a  v a2 s . co  m*/
        reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

From source file:SleepJob.java

License:Apache License

public void reduce(IntWritable key, Iterator<NullWritable> values,
        OutputCollector<NullWritable, NullWritable> output, Reporter reporter) throws IOException {
    try {/*  ww w.j  a va 2s .  c  om*/
        reporter.setStatus("Sleeping... (" + (reduceSleepDuration * (reduceSleepCount - count)) + ") ms left");
        Thread.sleep(reduceSleepDuration);

    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    count++;
}

From source file:DataJoinReducerBase.java

License:Apache License

/**
 * This is the function that re-groups values for a key into sub-groups based
 * on a secondary key (input tag)./*from  www .  j ava  2 s  .c om*/
 * 
 * @param arg1
 * @return
 */
private SortedMap<Object, ResetableIterator> regroup(Object key, Iterator arg1, Reporter reporter)
        throws IOException {
    this.numOfValues = 0;
    SortedMap<Object, ResetableIterator> retv = new TreeMap<Object, ResetableIterator>();
    TaggedMapOutput aRecord = null;
    while (arg1.hasNext()) {
        this.numOfValues += 1;
        if (this.numOfValues % 100 == 0) {
            reporter.setStatus("key: " + key.toString() + " numOfValues: " + this.numOfValues);
        }
        if (this.numOfValues > this.maxNumOfValuesPerGroup) {
            continue;
        }
        aRecord = ((TaggedMapOutput) arg1.next()).clone(job);
        Text tag = aRecord.getTag();
        ResetableIterator data = retv.get(tag);
        if (data == null) {
            data = createResetableIterator();
            retv.put(tag, data);
        }
        data.add(aRecord);
    }
    if (this.numOfValues > this.largestNumOfValues) {
        this.largestNumOfValues = numOfValues;
        LOG.info("key: " + key.toString() + " this.largestNumOfValues: " + this.largestNumOfValues);
    }
    return retv;
}

From source file:DataJoinReducerBase.java

License:Apache License

/**
 * The subclass can overwrite this method to perform additional filtering
 * and/or other processing logic before a value is collected.
 * /*from w w  w .j a v a  2s. com*/
 * @param key
 * @param aRecord
 * @param output
 * @param reporter
 * @throws IOException
 */
protected void collect(Object key, TaggedMapOutput aRecord, OutputCollector output, Reporter reporter)
        throws IOException {
    this.collected += 1;
    addLongValue("collectedCount", 1);
    if (aRecord != null) {
        output.collect(key, aRecord.getData());
        reporter.setStatus("key: " + key.toString() + " collected: " + collected);
        addLongValue("actuallyCollectedCount", 1);
    }
}

From source file:SleepJobWithArray.java

License:Apache License

public void map(IntWritable key, IntWritable value, OutputCollector<IntWritable, NullWritable> output,
        Reporter reporter) throws IOException {

    if (initBigArray) {
        // Yes, I should use log4j :-/
        System.out.println("Requesting array of " + bigArraySize);
        int[] foo = new int[bigArraySize];
    }//  w  w  w. j a v a2  s .  c o  m
    //it is expected that every map processes mapSleepCount number of records. 
    try {
        reporter.setStatus("Sleeping... (" + (mapSleepDuration * (mapSleepCount - count)) + ") ms left");
        Thread.sleep(mapSleepDuration);
    } catch (InterruptedException ex) {
        throw (IOException) new IOException("Interrupted while sleeping").initCause(ex);
    }
    ++count;
    // output reduceSleepCount * numReduce number of random values, so that
    // each reducer will get reduceSleepCount number of keys.
    int k = key.get();
    for (int i = 0; i < value.get(); ++i) {
        output.collect(new IntWritable(k + i), NullWritable.get());
    }
}

From source file:alluxio.client.hadoop.AbstractIOMapper.java

License:Apache License

/**
 * Map file name and offset into statistical data.
 * <p>/*www .  j a  v  a2s.  c  om*/
 * The map task is to get the <tt>key</tt>, which contains the file name, and the <tt>value</tt>,
 * which is the offset within the file.
 *
 * The parameters are passed to the abstract method
 * {@link #doIO(Reporter, String,long)}, which performs the io operation,
 * usually read or write data, and then
 * {@link #collectStats(OutputCollector, String,long, Object)} is called
 * to prepare stat data for a subsequent reducer.
 */
@Override
public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String name = key.toString();
    long longValue = value.get();

    reporter.setStatus("starting " + name + " ::host = " + mHostname);

    mStream = getIOStream(name);
    T statValue = null;
    long tStart = System.currentTimeMillis();
    try {
        statValue = doIO(reporter, name, longValue);
    } finally {
        if (mStream != null) {
            mStream.close();
        }
    }
    long tEnd = System.currentTimeMillis();
    long execTime = tEnd - tStart;
    collectStats(output, name, execTime, statValue);

    reporter.setStatus("finished " + name + " ::host = " + mHostname);
}

From source file:alluxio.client.hadoop.AccumulatingReducer.java

License:Apache License

/**
 * This method accumulates values based on their type.
 *
 * @param key the type of values//ww  w . j a  va 2s .  com
 * @param values the values to accumulates
 * @param output collect the result of accumulating
 * @param reporter to report progress and update status information
 */
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuilder sSum = new StringBuilder();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}

From source file:alluxio.hadoop.fs.AccumulatingReducer.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuffer sSum = new StringBuffer();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }//from  ww w  . j  av a  2  s .c o  m
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}