Example usage for org.apache.hadoop.io LongWritable set

List of usage examples for org.apache.hadoop.io LongWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable set.

Prototype

public void set(long value) 

Source Link

Document

Set the value of this LongWritable.

Usage

From source file:org.apache.nutch.crawl.CrawlDbReader.java

License:Apache License

public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException {

    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb statistics start: " + crawlDb);
    }//from  ww w . jav a 2 s .  co  m

    Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());

    JobConf job = new NutchJob(config);
    job.setJobName("stats " + crawlDb);
    job.setBoolean("db.reader.stats.sort", sort);

    FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(CrawlDbStatMapper.class);
    job.setCombinerClass(CrawlDbStatCombiner.class);
    job.setReducerClass(CrawlDbStatReducer.class);

    FileOutputFormat.setOutputPath(job, tmpFolder);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // https://issues.apache.org/jira/browse/NUTCH-1029
    job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    JobClient.runJob(job);

    // reading the result
    FileSystem fileSystem = FileSystem.get(config);
    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder);

    Text key = new Text();
    LongWritable value = new LongWritable();

    TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>();
    for (int i = 0; i < readers.length; i++) {
        SequenceFile.Reader reader = readers[i];
        while (reader.next(key, value)) {
            String k = key.toString();
            LongWritable val = stats.get(k);
            if (val == null) {
                val = new LongWritable();
                if (k.equals("scx"))
                    val.set(Long.MIN_VALUE);
                if (k.equals("scn"))
                    val.set(Long.MAX_VALUE);
                stats.put(k, val);
            }
            if (k.equals("scx")) {
                if (val.get() < value.get())
                    val.set(value.get());
            } else if (k.equals("scn")) {
                if (val.get() > value.get())
                    val.set(value.get());
            } else {
                val.set(val.get() + value.get());
            }
        }
        reader.close();
    }

    if (LOG.isInfoEnabled()) {
        LOG.info("Statistics for CrawlDb: " + crawlDb);
        LongWritable totalCnt = stats.get("T");
        stats.remove("T");
        LOG.info("TOTAL urls:\t" + totalCnt.get());
        for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
            String k = entry.getKey();
            LongWritable val = entry.getValue();
            if (k.equals("scn")) {
                LOG.info("min score:\t" + (float) (val.get() / 1000.0f));
            } else if (k.equals("scx")) {
                LOG.info("max score:\t" + (float) (val.get() / 1000.0f));
            } else if (k.equals("sct")) {
                LOG.info("avg score:\t" + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
            } else if (k.startsWith("status")) {
                String[] st = k.split(" ");
                int code = Integer.parseInt(st[1]);
                if (st.length > 2)
                    LOG.info("   " + st[2] + " :\t" + val);
                else
                    LOG.info(st[0] + " " + code + " (" + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
            } else
                LOG.info(k + ":\t" + val);
        }
    }
    // removing the tmp folder
    fileSystem.delete(tmpFolder, true);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb statistics: done");
    }

}

From source file:org.apache.nutch.crawl.TestMapWritable.java

License:Apache License

public void testPerformance() throws Exception {
    FileSystem fs = FileSystem.get(configuration);
    Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile");
    fs.delete(file);//from www. jav  a  2 s.  c  om
    org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, file,
            IntWritable.class, MapWritable.class);
    // write map
    System.out.println("start writing map's");
    long start = System.currentTimeMillis();
    IntWritable key = new IntWritable();
    MapWritable map = new MapWritable();
    LongWritable mapValue = new LongWritable();
    for (int i = 0; i < 1000000; i++) {
        key.set(i);
        mapValue.set(i);
        map.put(key, mapValue);
        writer.append(key, map);
    }
    long needed = System.currentTimeMillis() - start;
    writer.close();
    System.out.println("needed time for writing map's: " + needed);

    // read map

    org.apache.hadoop.io.SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, configuration);
    System.out.println("start reading map's");
    start = System.currentTimeMillis();
    while (reader.next(key, map)) {

    }
    reader.close();
    needed = System.currentTimeMillis() - start;
    System.out.println("needed time for reading map's: " + needed);
    fs.delete(file);

    // Text
    System.out.println("start writing Text's");
    writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class);
    // write map
    start = System.currentTimeMillis();
    key = new IntWritable();
    Text value = new Text();
    String s = "15726:15726";
    for (int i = 0; i < 1000000; i++) {
        key.set(i);
        value.set(s);
        writer.append(key, value);
    }
    needed = System.currentTimeMillis() - start;
    writer.close();
    System.out.println("needed time for writing Text's: " + needed);

    // read map
    System.out.println("start reading Text's");
    reader = new SequenceFile.Reader(fs, file, configuration);
    start = System.currentTimeMillis();
    while (reader.next(key, value)) {

    }
    needed = System.currentTimeMillis() - start;
    System.out.println("needed time for reading Text: " + needed);
    fs.delete(file);
}

From source file:org.apache.nutch.crawl.WebTableReader.java

License:Apache License

@Override
public Map<String, Object> run(Map<String, Object> args) throws Exception {
    Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".") + "stat_tmp" + System.currentTimeMillis());

    numJobs = 1;/* w  w w . jav a 2  s  .co  m*/
    currentJob = new NutchJob(getConf(), "db_stats");

    currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    Boolean sort = (Boolean) args.get(Nutch.ARG_SORT);
    if (sort == null)
        sort = Boolean.FALSE;
    currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort);

    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(), String.class,
            WebPage.class);
    Query<String, WebPage> query = store.newQuery();

    // remove the __g__dirty field since it is not stored
    String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1, WebPage._ALL_FIELDS.length);
    query.setFields(fields);

    GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class, WebTableStatMapper.class,
            null, true);

    currentJob.setCombinerClass(WebTableStatCombiner.class);
    currentJob.setReducerClass(WebTableStatReducer.class);

    FileOutputFormat.setOutputPath(currentJob, tmpFolder);

    currentJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    currentJob.setOutputKeyClass(Text.class);
    currentJob.setOutputValueClass(LongWritable.class);
    FileSystem fileSystem = FileSystem.get(getConf());

    try {
        currentJob.waitForCompletion(true);
    } finally {
        ToolUtil.recordJobStatus(null, currentJob, results);
        if (!currentJob.isSuccessful()) {
            fileSystem.delete(tmpFolder, true);
            return results;
        }
    }

    Text key = new Text();
    LongWritable value = new LongWritable();

    SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat.getReaders(getConf(),
            tmpFolder);

    TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>();
    for (int i = 0; i < readers.length; i++) {
        SequenceFile.Reader reader = readers[i];
        while (reader.next(key, value)) {
            String k = key.toString();
            LongWritable val = stats.get(k);
            if (val == null) {
                val = new LongWritable();
                if (k.equals("scx"))
                    val.set(Long.MIN_VALUE);
                if (k.equals("scn"))
                    val.set(Long.MAX_VALUE);
                stats.put(k, val);
            }
            if (k.equals("scx")) {
                if (val.get() < value.get())
                    val.set(value.get());
            } else if (k.equals("scn")) {
                if (val.get() > value.get())
                    val.set(value.get());
            } else {
                val.set(val.get() + value.get());
            }
        }
        reader.close();
    }

    LongWritable totalCnt = stats.get("T");
    if (totalCnt == null)
        totalCnt = new LongWritable(0);
    stats.remove("T");
    results.put("TOTAL urls", totalCnt.get());
    for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
        String k = entry.getKey();
        LongWritable val = entry.getValue();
        if (k.equals("scn")) {
            results.put("min score", (val.get() / 1000.0f));
        } else if (k.equals("scx")) {
            results.put("max score", (val.get() / 1000.0f));
        } else if (k.equals("sct")) {
            results.put("avg score", (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
        } else if (k.startsWith("status")) {
            String[] st = k.split(" ");
            int code = Integer.parseInt(st[1]);
            if (st.length > 2)
                results.put(st[2], val.get());
            else
                results.put(st[0] + " " + code + " (" + CrawlStatus.getName((byte) code) + ")", val.get());
        } else
            results.put(k, val.get());
    }
    // removing the tmp folder
    fileSystem.delete(tmpFolder, true);

    return results;
}

From source file:org.apache.nutch.mapreduce.WebTableReader.java

License:Apache License

@Override
protected void doRun(Map<String, Object> args) throws Exception {
    Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".") + "stat_tmp" + System.currentTimeMillis());

    currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    Boolean sort = (Boolean) args.get(Nutch.ARG_SORT);
    if (sort == null)
        sort = Boolean.FALSE;//from  ww w  .jav  a  2  s  .  c  om
    currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort);

    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(), String.class,
            WebPage.class);
    Query<String, WebPage> query = store.newQuery();

    // remove the __g__dirty field since it is not stored
    String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1, WebPage._ALL_FIELDS.length);
    query.setFields(fields);

    GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class, WebTableStatMapper.class,
            null, true);

    currentJob.setCombinerClass(WebTableStatCombiner.class);
    currentJob.setReducerClass(WebTableStatReducer.class);

    FileOutputFormat.setOutputPath(currentJob, tmpFolder);

    currentJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    currentJob.setOutputKeyClass(Text.class);
    currentJob.setOutputValueClass(LongWritable.class);
    FileSystem fileSystem = FileSystem.get(getConf());

    try {
        currentJob.waitForCompletion(true);
    } finally {
        if (!currentJob.isSuccessful()) {
            fileSystem.delete(tmpFolder, true);
            return;
        }
    }

    Text key = new Text();
    LongWritable value = new LongWritable();

    SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat.getReaders(getConf(),
            tmpFolder);

    TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>();
    for (int i = 0; i < readers.length; i++) {
        SequenceFile.Reader reader = readers[i];
        while (reader.next(key, value)) {
            String k = key.toString();
            LongWritable val = stats.get(k);
            if (val == null) {
                val = new LongWritable();
                if (k.equals("scx"))
                    val.set(Long.MIN_VALUE);
                if (k.equals("scn"))
                    val.set(Long.MAX_VALUE);
                stats.put(k, val);
            }
            if (k.equals("scx")) {
                if (val.get() < value.get())
                    val.set(value.get());
            } else if (k.equals("scn")) {
                if (val.get() > value.get())
                    val.set(value.get());
            } else {
                val.set(val.get() + value.get());
            }
        }
        reader.close();
    }

    LongWritable totalCnt = stats.get("T");
    if (totalCnt == null)
        totalCnt = new LongWritable(0);
    stats.remove("T");
    results.put("TOTAL urls", totalCnt.get());
    for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
        String k = entry.getKey();
        LongWritable val = entry.getValue();
        if (k.equals("scn")) {
            results.put("min score", (val.get() / 1000.0f));
        } else if (k.equals("scx")) {
            results.put("max score", (val.get() / 1000.0f));
        } else if (k.equals("sct")) {
            results.put("avg score", (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
        } else if (k.startsWith("status")) {
            String[] st = k.split(" ");
            int code = Integer.parseInt(st[1]);
            if (st.length > 2)
                results.put(st[2], val.get());
            else
                results.put(st[0] + " " + code + " (" + CrawlStatus.getName((byte) code) + ")", val.get());
        } else
            results.put(k, val.get());
    }
    // removing the tmp folder
    fileSystem.delete(tmpFolder, true);
}

From source file:org.apache.orc.mapred.OrcMapredRecordReader.java

License:Apache License

static LongWritable nextLong(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;// w ww .j a  va  2 s. c o m
    }
    if (vector.noNulls || !vector.isNull[row]) {
        LongWritable result;
        if (previous == null || previous.getClass() != LongWritable.class) {
            result = new LongWritable();
        } else {
            result = (LongWritable) previous;
        }
        result.set(((LongColumnVector) vector).vector[row]);
        return result;
    } else {
        return null;
    }
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Test the case where the top level isn't a struct, but a long.
 *///from   w  w  w . java  2s  .c  om
@Test
public void testLongRoot() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY");
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024);
    conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11");
    final String typeStr = "bigint";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);
    LongWritable value = new LongWritable();
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs,
            conf, "long.orc", Reporter.NULL);
    for (long lo = 0; lo < 2000; ++lo) {
        value.set(lo);
        writer.write(nada, value);
    }
    writer.close(Reporter.NULL);

    Path path = new Path(workDir, "long.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(CompressionKind.SNAPPY, file.getCompressionKind());
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(1000, file.getRowIndexStride());
    assertEquals(64 * 1024, file.getCompressionSize());
    assertEquals(OrcFile.Version.V_0_11, file.getFileVersion());
    FileSplit split = new FileSplit(path, 0, 100000, new String[0]);
    RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split,
            conf, Reporter.NULL);
    nada = reader.createKey();
    value = reader.createValue();
    for (long lo = 0; lo < 2000; ++lo) {
        assertEquals(true, reader.next(nada, value));
        assertEquals(lo, value.get());
    }
    assertEquals(false, reader.next(nada, value));
}

From source file:org.apache.sysml.runtime.io.FrameWriterBinaryBlock.java

License:Apache License

/**
 * Internal primitive to write a block-aligned row range of a frame to a single sequence file, 
 * which is used for both single- and multi-threaded writers (for consistency). 
 * //from w w w. j av a2s  .com
 * @param path file path
 * @param job job configuration
 * @param fs file system
 * @param src frame block
 * @param blen block length
 * @param rl lower row
 * @param ru upper row
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 * @throws IOException if IOException occurs
 */
@SuppressWarnings("deprecation")
protected static void writeBinaryBlockFrameToSequenceFile(Path path, JobConf job, FileSystem fs, FrameBlock src,
        int blen, int rl, int ru) throws DMLRuntimeException, IOException {
    //1) create sequence file writer 
    SequenceFile.Writer writer = null;
    writer = new SequenceFile.Writer(fs, job, path, LongWritable.class, FrameBlock.class);

    try {
        //2) reblock and write
        LongWritable index = new LongWritable();

        if (src.getNumRows() <= blen) //opt for single block
        {
            //directly write single block
            index.set(1);
            writer.append(index, src);
        } else //general case
        {
            //initialize blocks for reuse (at most 4 different blocks required)
            FrameBlock[] blocks = createFrameBlocksForReuse(src.getSchema(), src.getColumnNames(),
                    src.getNumRows());

            //create and write subblocks of frame
            for (int bi = rl; bi < ru; bi += blen) {
                int len = Math.min(blen, src.getNumRows() - bi);

                //get reuse frame block and copy subpart to block (incl meta on first)
                FrameBlock block = getFrameBlockForReuse(blocks);
                src.sliceOperations(bi, bi + len - 1, 0, src.getNumColumns() - 1, block);
                if (bi == 0) //first block
                    block.setColumnMetadata(src.getColumnMetadata());

                //append block to sequence file
                index.set(bi + 1);
                writer.append(index, block);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}

From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java

License:Apache License

public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job,
        String filename, long startKey, long numKeys) throws IOException {
    LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>();
    Path file = new Path(workDir, filename);
    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {//from   w  w w. j  a va  2 s  .c  o  m
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (long i = startKey; i < numKeys; i++) {
            key.set(i);
            value.set(Integer.toString(r.nextInt(10000)));
            data.put(new LongWritable(key.get()), new Text(value.toString()));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }
    return data;
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file)
        throws IOException {
    FileInputFormat.setInputPaths(job, workDir);

    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {//from   w  w w .ja v a 2  s  .c  o m
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 10; i > 0; i--) {
            key.set(r.nextInt(1000));
            value.set(Integer.toString(i));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }

    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(job, 1);
    System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; "
            + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; "
            + "file = " + ((FileSplit) splits[0]).getPath());
    return splits[0];
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);//from   w  w w.j ava2 s.  c  o m
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}