Example usage for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable()

Source Link

Usage

From source file:com.hdfs.concat.crush.CrushPartitioner.java

License:Apache License

@Override
public void configure(JobConf job) {
    String path = job.get("crush.partition.map");
    int expPartitions = job.getNumReduceTasks();

    bucketToPartition = new HashMap<Text, Integer>(100);

    try {//w ww . j ava2 s  .c o m
        FileSystem fs = FileSystem.get(job);

        Reader reader = new Reader(fs, new Path(path), job);

        Text bucket = new Text();
        IntWritable partNum = new IntWritable();

        while (reader.next(bucket, partNum)) {
            int partNumValue = partNum.get();

            if (partNumValue < 0 || partNumValue >= expPartitions) {
                throw new IllegalArgumentException(
                        "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks");
            }

            Integer prev = bucketToPartition.put(new Text(bucket), partNumValue);

            if (null != prev) {
                throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not read partition map from " + path, e);
    }

    if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) {
        throw new IllegalArgumentException(
                path + " contains more than " + expPartitions + " distinct partitions");
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partition() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from  w  ww.j  a  v a2  s  . c o  m
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionWithFewerPartitionsThanReduceTasks() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from   w w  w .jav a 2s  . c  o  m
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(40);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void noDupes() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable value = new IntWritable();

    key.set("bucket-1");
    value.set(0);//w  w  w  .j  ava  2  s  . c  om
    writer.append(key, value);

    key.set("bucket-2");
    value.set(0);
    writer.append(key, value);

    key.set("bucket-2");
    value.set(1);
    writer.append(key, value);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("bucket-2")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooLow() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from  w w w .  jav  a  2 s  .  c  om
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(-1);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("No such thing as a negitave partition");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition -1")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooHigh() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);// w  w  w  . j  av  a  2s. c om
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(3);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("Parition with id 3 is not allowed with 3 reduce tasks");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition 3")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void verifyCrushOutput(File crushOutput, int[]... keyCounts) throws IOException {

    List<String> actual = new ArrayList<String>();

    Text text = new Text();
    IntWritable value = new IntWritable();

    Reader reader = new Reader(FileSystem.get(job), new Path(crushOutput.getAbsolutePath()), job);

    while (reader.next(text, value)) {
        actual.add(format("%s\t%d", text, value.get()));
    }/*from   ww  w .j  a  v a  2 s.  co  m*/

    reader.close();

    int expLines = 0;
    List<List<String>> expected = new ArrayList<List<String>>();

    for (int[] keyCount : keyCounts) {
        int key = keyCount[0];
        int count = keyCount[1];

        List<String> lines = new ArrayList<String>();
        expected.add(lines);

        for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) {
            String line = format("%d\t%d", key, j);
            lines.add(line);
        }

        expLines += count;
    }

    /*
     * Make sure each file's data is contiguous in the crush output file.
     */
    for (List<String> list : expected) {
        int idx = actual.indexOf(list.get(0));

        assertThat(idx, greaterThanOrEqualTo(0));

        assertThat(actual.subList(idx, idx + list.size()), equalTo(list));
    }

    assertThat(actual.size(), equalTo(expLines));
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void createFile(File dir, String fileName, int key, int count) throws IOException {
    File file = new File(dir, fileName);

    Writer writer = SequenceFile.createWriter(FileSystem.get(job), job, new Path(file.getAbsolutePath()),
            Text.class, IntWritable.class);

    Text text = new Text(Integer.toString(key));
    IntWritable value = new IntWritable();

    for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) {
        value.set(j);/*from w w w .j av  a 2 s .  c o m*/

        writer.append(text, value);
    }

    writer.close();
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void verifyFile(File dir, String fileName, int key, int count) throws IOException {
    File file = new File(dir, fileName);

    Reader reader = new Reader(FileSystem.get(job), new Path(file.getAbsolutePath()), job);

    int i = 0;//  w w  w.j  a  v a 2  s .c  o m
    int actual = 0;

    Text text = new Text();
    IntWritable value = new IntWritable();

    while (reader.next(text, value)) {
        assertThat(text.toString(), equalTo(Integer.toString(key)));
        assertThat(value.get(), equalTo(i));

        if (i == 9) {
            i = 0;
        } else {
            i++;
        }

        actual++;
    }

    reader.close();

    assertThat(actual, equalTo(count));
}

From source file:com.hdfs.concat.crush.CrushTest.java

License:Apache License

@Test
public void bucketing() throws Exception {
    File in = tmp.newFolder("in");

    Counters expectedCounters = new Counters();
    List<String> expectedBucketFiles = new ArrayList<String>();

    /*/*  w  w w  .j a v  a 2s.  co m*/
     * Create a hierarchy of directories. Directories are distinguished by a trailing slash in these comments.
     *
     *   1/
     *         1.1/
     *               file1 10 bytes
     *               file2 20 bytes
     *               file3 30 bytes
     *               file4 41 bytes
     *               file5 15 bytes
     *               file6 30 bytes
     *               file7   20 bytes
     *         1.2/
     *               file1 20 bytes
     *               file2 10 bytes
     *         1.3/
     *   2/
     *         file1 70 bytes
     *         file2 30 bytes
     *         file3 25 bytes
     *         file4 30 bytes
     *         file5 35 bytes
     *         2.1/
     *               file1 10 bytes
     *         2.2/
     *               file1 25 bytes
     *               file2 15 bytes
     *               file3 35 bytes
     *         2.3/
     *               file1 41 bytes
     *               file2 10 bytes
     *         2.4/
     *               2.4.1/
     *                     file1 100 bytes
     *                     file2   30 bytes
     *               2.4.2/
     *                     file1 20 bytes
     *                     file2 20 bytes
     *                     file3 10 bytes
     */

    /*
     * in contains 2 dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *          2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1");
    File dir2 = tmp.newFolder("in/2");

    /*
     * in/1 contains three dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *                1.1/
     *                1.2/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    File dir1_1 = tmp.newFolder("in/1/1.1");
    File dir1_2 = tmp.newFolder("in/1/1.2");
    tmp.newFolder("in/1/1.3");

    /*
     * in/2 contains five files and four dirs.
     *
     *    in/
     *          2/
     *               file1 70 bytes
     *               file2 30 bytes
     *               file3 25 bytes
     *               file4 30 bytes
     *               file5 35 bytes
     *                2.1/
     *                2.2/
     *                2.3/
     *                2.4/
     *
     *    0                  1                  2
     *    file5   35      file2 30      file4 30
     *                      file3 25
     *
     * Buckets 0 and 2 have a single file each so they are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 5);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 3);

    File dir2_1 = tmp.newFolder("in/2/2.1");
    File dir2_2 = tmp.newFolder("in/2/2.2");
    File dir2_3 = tmp.newFolder("in/2/2.3");
    tmp.newFolder("in/2/2.4");

    createFile(dir2, "file1", 70);
    createFile(dir2, "file2", 30);
    createFile(dir2, "file3", 25);
    createFile(dir2, "file4", 30);
    createFile(dir2, "file5", 35);

    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file2").getAbsolutePath()));
    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file3").getAbsolutePath()));

    /*
     * in/1/1.1 contains seven files and no dirs.
     *
     *    in/
     *          1/
     *                1.1/
     *                     file1 10 bytes
     *                     file2 20 bytes
     *                     file3 30 bytes
     *                     file4 41 bytes
     *                     file5 15 bytes
     *                     file6 30 bytes
     *                     file7   20 bytes
     *
     *    0                  1                  2
     *    file3 30      file6 30      file2 20
     *    file5 15      file1 10      file7 20
     *
     * file4 is > 50 * 0.8 so it is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 7);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 6);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir1_1, "file1", 10);
    createFile(dir1_1, "file2", 20);
    createFile(dir1_1, "file3", 30);
    createFile(dir1_1, "file4", 41);
    createFile(dir1_1, "file5", 15);
    createFile(dir1_1, "file6", 30);
    createFile(dir1_1, "file7", 20);

    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file3").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file5").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file6").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file2").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file7").getAbsolutePath()));

    /*
     * in/1/1.2 contains to files.
     *
     *    in/
     *          1/
     *                1.2/
     *                     file1 20 bytes
     *                     file2 10 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);

    createFile(dir1_2, "file1", 20);
    createFile(dir1_2, "file2", 10);

    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file2").getAbsolutePath()));

    /*
     * in/1/1.3 is empty.
     *
     *    in/
     *          1/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1/1.3");

    /*
     * in/2/2.1 contains on file.
     *
     *    in/
     *          2/
     *                2.1/
     *                     file1 10 bytes
     *
     * Single file dirs are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_1, "file1", 10);

    /*
     * in/2/2.2 contains three files.
     *
     *    in/
     *          2/
     *                2.2/
     *                     file1 25 bytes
     *                     file2 15 bytes
     *                     file3 35 bytes
     *
     *    0                  1
     *    file3 35      file1 25
     *                      file2 15
     *
     * Bucket 0 with a single file is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_2, "file1", 25);
    createFile(dir2_2, "file2", 15);
    createFile(dir2_2, "file3", 35);

    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file2").getAbsolutePath()));

    /*
     * in/2/2.3 contains 2 files.
     *
     *    in/
     *          2/
     *                2.3/
     *                     file1 41 bytes
     *                     file2 10 bytes
     *
     * file1 is too big and leaving file2 as a single file, which is also ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_3, "file1", 41);
    createFile(dir2_3, "file2", 10);

    /*
     * in/2/2.4 contains two sub directories and no files.
     *
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                     2.4.2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/2/2.4");

    File dir2_4_1 = tmp.newFolder("in/2/2.4/2.4.1");
    File dir2_4_2 = tmp.newFolder("in/2/2.4/2.4.2");

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                           file1 100 bytes
     *                           file2   30 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_4_1, "file1", 100);
    createFile(dir2_4_1, "file2", 30);

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.2/
     *                           file1 20 bytes
     *                           file2 20 bytes
     *                           file3 10 bytes
     *   0
     *   file1 20
     *   file2 20
     *   file3 10
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 3);

    createFile(dir2_4_2, "file1", 20);
    createFile(dir2_4_2, "file2", 20);
    createFile(dir2_4_2, "file3", 10);

    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file2").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file3").getAbsolutePath()));

    Crush crush = new Crush();

    crush.setConf(job);
    crush.setFileSystem(fileSystem);

    /*
     * Call these in the same order that run() does.
     */
    crush.createJobConfAndParseArgs("--compress=none", "--max-file-blocks=1", in.getAbsolutePath(),
            new File(tmp.getRoot(), "out").getAbsolutePath(), "20101124171730");
    crush.writeDirs();

    /*
     * Verify bucket contents.
     */

    List<String> actualBucketFiles = new ArrayList<String>();

    Text key = new Text();
    Text value = new Text();

    Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job);

    while (reader.next(key, value)) {
        actualBucketFiles.add(format("%s\t%s", key, value));
    }

    reader.close();

    Collections.sort(expectedBucketFiles);
    Collections.sort(actualBucketFiles);

    assertThat(actualBucketFiles, equalTo(expectedBucketFiles));

    /*
     * Verify the partition map.
     */
    Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job);

    IntWritable partNum = new IntWritable();

    Map<String, Integer> actualPartitions = new HashMap<String, Integer>();

    while (partitionMapReader.next(key, partNum)) {
        actualPartitions.put(key.toString(), partNum.get());
    }

    partitionMapReader.close();

    /*
     * These crush files need to allocated into 5 partitions:
     *
     * in/2-1                  55 bytes
     * in/1/1.1-0            45 bytes
     * in/1/1.1-2            40 bytes
     * in/1/1.1-1            40 bytes
     * in/1/1.2-0            30 bytes
     * in/2/2.2-1            40 bytes
     * in/2/2.4/2.4.2-0   50 bytes
     *
     *    0                     1                                 2                        3                        4
     *    in/2-1 55         in/2/2.4/2.4.2-0   50   in/1/1.1-0   45   in/1/1.1-2   40   in/1/1.1-1   40
     *                                                                                  in/2/2.2-1   40   in/1/1.2-0   39
     */
    Map<String, Integer> expectedPartitions = new HashMap<String, Integer>();

    //TODO: this may not be deterministic due to jvm/hashmap/filesystem
    expectedPartitions.put(dir2.getAbsolutePath() + "-1", 0);
    expectedPartitions.put(dir2_4_2.getAbsolutePath() + "-0", 1);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-0", 2);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-2", 4);
    expectedPartitions.put(dir2_2.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_2.getAbsolutePath() + "-0", 4);

    assertThat(actualPartitions, equalTo(expectedPartitions));

    /*
     * Verify counters.
     */
    Counters actualCounters = new Counters();

    DataInputStream countersStream = FileSystem.get(job).open(crush.getCounters());

    actualCounters.readFields(countersStream);

    countersStream.close();

    assertThat(actualCounters, equalTo(expectedCounters));
}