Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.hdfs.concat.crush.CrushPartitioner.java

License:Apache License

@Override
public void configure(JobConf job) {
    String path = job.get("crush.partition.map");
    int expPartitions = job.getNumReduceTasks();

    bucketToPartition = new HashMap<Text, Integer>(100);

    try {//w ww . j ava2 s  .c o m
        FileSystem fs = FileSystem.get(job);

        Reader reader = new Reader(fs, new Path(path), job);

        Text bucket = new Text();
        IntWritable partNum = new IntWritable();

        while (reader.next(bucket, partNum)) {
            int partNumValue = partNum.get();

            if (partNumValue < 0 || partNumValue >= expPartitions) {
                throw new IllegalArgumentException(
                        "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks");
            }

            Integer prev = bucketToPartition.put(new Text(bucket), partNumValue);

            if (null != prev) {
                throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not read partition map from " + path, e);
    }

    if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) {
        throw new IllegalArgumentException(
                path + " contains more than " + expPartitions + " distinct partitions");
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partition() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from  w  ww.j  a  v a2  s  . c o  m
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionWithFewerPartitionsThanReduceTasks() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from   w w  w .jav a 2s  . c  o  m
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(40);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void noDupes() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable value = new IntWritable();

    key.set("bucket-1");
    value.set(0);//w  w  w  .j  ava  2  s  . c  om
    writer.append(key, value);

    key.set("bucket-2");
    value.set(0);
    writer.append(key, value);

    key.set("bucket-2");
    value.set(1);
    writer.append(key, value);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("bucket-2")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooLow() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);//from  w w w .  jav  a  2 s  .  c  om
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(-1);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("No such thing as a negitave partition");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition -1")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooHigh() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);// w  w  w  . j  av  a  2s. c om
    writer.append(key, partNum);

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(3);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("Parition with id 3 is not allowed with 3 reduce tasks");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition 3")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void verifyCrushOutput(File crushOutput, int[]... keyCounts) throws IOException {

    List<String> actual = new ArrayList<String>();

    Text text = new Text();
    IntWritable value = new IntWritable();

    Reader reader = new Reader(FileSystem.get(job), new Path(crushOutput.getAbsolutePath()), job);

    while (reader.next(text, value)) {
        actual.add(format("%s\t%d", text, value.get()));
    }/*from   ww  w .j  a  v a  2 s.  co  m*/

    reader.close();

    int expLines = 0;
    List<List<String>> expected = new ArrayList<List<String>>();

    for (int[] keyCount : keyCounts) {
        int key = keyCount[0];
        int count = keyCount[1];

        List<String> lines = new ArrayList<String>();
        expected.add(lines);

        for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) {
            String line = format("%d\t%d", key, j);
            lines.add(line);
        }

        expLines += count;
    }

    /*
     * Make sure each file's data is contiguous in the crush output file.
     */
    for (List<String> list : expected) {
        int idx = actual.indexOf(list.get(0));

        assertThat(idx, greaterThanOrEqualTo(0));

        assertThat(actual.subList(idx, idx + list.size()), equalTo(list));
    }

    assertThat(actual.size(), equalTo(expLines));
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void createFile(File dir, String fileName, int key, int count) throws IOException {
    File file = new File(dir, fileName);

    Writer writer = SequenceFile.createWriter(FileSystem.get(job), job, new Path(file.getAbsolutePath()),
            Text.class, IntWritable.class);

    Text text = new Text(Integer.toString(key));
    IntWritable value = new IntWritable();

    for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) {
        value.set(j);/*from w w w .j av  a 2 s .  c o m*/

        writer.append(text, value);
    }

    writer.close();
}

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void verifyFile(File dir, String fileName, int key, int count) throws IOException {
    File file = new File(dir, fileName);

    Reader reader = new Reader(FileSystem.get(job), new Path(file.getAbsolutePath()), job);

    int i = 0;//  w w  w.j  a  v a 2  s .c  o m
    int actual = 0;

    Text text = new Text();
    IntWritable value = new IntWritable();

    while (reader.next(text, value)) {
        assertThat(text.toString(), equalTo(Integer.toString(key)));
        assertThat(value.get(), equalTo(i));

        if (i == 9) {
            i = 0;
        } else {
            i++;
        }

        actual++;
    }

    reader.close();

    assertThat(actual, equalTo(count));
}

From source file:com.hdfs.concat.crush.CrushTest.java

License:Apache License

@Test
public void bucketing() throws Exception {
    File in = tmp.newFolder("in");

    Counters expectedCounters = new Counters();
    List<String> expectedBucketFiles = new ArrayList<String>();

    /*/*  w  w w  .j a v  a 2s.  co m*/
     * Create a hierarchy of directories. Directories are distinguished by a trailing slash in these comments.
     *
     *   1/
     *         1.1/
     *               file1 10 bytes
     *               file2 20 bytes
     *               file3 30 bytes
     *               file4 41 bytes
     *               file5 15 bytes
     *               file6 30 bytes
     *               file7   20 bytes
     *         1.2/
     *               file1 20 bytes
     *               file2 10 bytes
     *         1.3/
     *   2/
     *         file1 70 bytes
     *         file2 30 bytes
     *         file3 25 bytes
     *         file4 30 bytes
     *         file5 35 bytes
     *         2.1/
     *               file1 10 bytes
     *         2.2/
     *               file1 25 bytes
     *               file2 15 bytes
     *               file3 35 bytes
     *         2.3/
     *               file1 41 bytes
     *               file2 10 bytes
     *         2.4/
     *               2.4.1/
     *                     file1 100 bytes
     *                     file2   30 bytes
     *               2.4.2/
     *                     file1 20 bytes
     *                     file2 20 bytes
     *                     file3 10 bytes
     */

    /*
     * in contains 2 dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *          2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1");
    File dir2 = tmp.newFolder("in/2");

    /*
     * in/1 contains three dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *                1.1/
     *                1.2/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    File dir1_1 = tmp.newFolder("in/1/1.1");
    File dir1_2 = tmp.newFolder("in/1/1.2");
    tmp.newFolder("in/1/1.3");

    /*
     * in/2 contains five files and four dirs.
     *
     *    in/
     *          2/
     *               file1 70 bytes
     *               file2 30 bytes
     *               file3 25 bytes
     *               file4 30 bytes
     *               file5 35 bytes
     *                2.1/
     *                2.2/
     *                2.3/
     *                2.4/
     *
     *    0                  1                  2
     *    file5   35      file2 30      file4 30
     *                      file3 25
     *
     * Buckets 0 and 2 have a single file each so they are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 5);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 3);

    File dir2_1 = tmp.newFolder("in/2/2.1");
    File dir2_2 = tmp.newFolder("in/2/2.2");
    File dir2_3 = tmp.newFolder("in/2/2.3");
    tmp.newFolder("in/2/2.4");

    createFile(dir2, "file1", 70);
    createFile(dir2, "file2", 30);
    createFile(dir2, "file3", 25);
    createFile(dir2, "file4", 30);
    createFile(dir2, "file5", 35);

    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file2").getAbsolutePath()));
    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file3").getAbsolutePath()));

    /*
     * in/1/1.1 contains seven files and no dirs.
     *
     *    in/
     *          1/
     *                1.1/
     *                     file1 10 bytes
     *                     file2 20 bytes
     *                     file3 30 bytes
     *                     file4 41 bytes
     *                     file5 15 bytes
     *                     file6 30 bytes
     *                     file7   20 bytes
     *
     *    0                  1                  2
     *    file3 30      file6 30      file2 20
     *    file5 15      file1 10      file7 20
     *
     * file4 is > 50 * 0.8 so it is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 7);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 6);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir1_1, "file1", 10);
    createFile(dir1_1, "file2", 20);
    createFile(dir1_1, "file3", 30);
    createFile(dir1_1, "file4", 41);
    createFile(dir1_1, "file5", 15);
    createFile(dir1_1, "file6", 30);
    createFile(dir1_1, "file7", 20);

    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file3").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file5").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file6").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file2").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file7").getAbsolutePath()));

    /*
     * in/1/1.2 contains to files.
     *
     *    in/
     *          1/
     *                1.2/
     *                     file1 20 bytes
     *                     file2 10 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);

    createFile(dir1_2, "file1", 20);
    createFile(dir1_2, "file2", 10);

    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file2").getAbsolutePath()));

    /*
     * in/1/1.3 is empty.
     *
     *    in/
     *          1/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1/1.3");

    /*
     * in/2/2.1 contains on file.
     *
     *    in/
     *          2/
     *                2.1/
     *                     file1 10 bytes
     *
     * Single file dirs are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_1, "file1", 10);

    /*
     * in/2/2.2 contains three files.
     *
     *    in/
     *          2/
     *                2.2/
     *                     file1 25 bytes
     *                     file2 15 bytes
     *                     file3 35 bytes
     *
     *    0                  1
     *    file3 35      file1 25
     *                      file2 15
     *
     * Bucket 0 with a single file is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_2, "file1", 25);
    createFile(dir2_2, "file2", 15);
    createFile(dir2_2, "file3", 35);

    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file2").getAbsolutePath()));

    /*
     * in/2/2.3 contains 2 files.
     *
     *    in/
     *          2/
     *                2.3/
     *                     file1 41 bytes
     *                     file2 10 bytes
     *
     * file1 is too big and leaving file2 as a single file, which is also ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_3, "file1", 41);
    createFile(dir2_3, "file2", 10);

    /*
     * in/2/2.4 contains two sub directories and no files.
     *
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                     2.4.2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/2/2.4");

    File dir2_4_1 = tmp.newFolder("in/2/2.4/2.4.1");
    File dir2_4_2 = tmp.newFolder("in/2/2.4/2.4.2");

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                           file1 100 bytes
     *                           file2   30 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_4_1, "file1", 100);
    createFile(dir2_4_1, "file2", 30);

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.2/
     *                           file1 20 bytes
     *                           file2 20 bytes
     *                           file3 10 bytes
     *   0
     *   file1 20
     *   file2 20
     *   file3 10
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 3);

    createFile(dir2_4_2, "file1", 20);
    createFile(dir2_4_2, "file2", 20);
    createFile(dir2_4_2, "file3", 10);

    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file2").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file3").getAbsolutePath()));

    Crush crush = new Crush();

    crush.setConf(job);
    crush.setFileSystem(fileSystem);

    /*
     * Call these in the same order that run() does.
     */
    crush.createJobConfAndParseArgs("--compress=none", "--max-file-blocks=1", in.getAbsolutePath(),
            new File(tmp.getRoot(), "out").getAbsolutePath(), "20101124171730");
    crush.writeDirs();

    /*
     * Verify bucket contents.
     */

    List<String> actualBucketFiles = new ArrayList<String>();

    Text key = new Text();
    Text value = new Text();

    Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job);

    while (reader.next(key, value)) {
        actualBucketFiles.add(format("%s\t%s", key, value));
    }

    reader.close();

    Collections.sort(expectedBucketFiles);
    Collections.sort(actualBucketFiles);

    assertThat(actualBucketFiles, equalTo(expectedBucketFiles));

    /*
     * Verify the partition map.
     */
    Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job);

    IntWritable partNum = new IntWritable();

    Map<String, Integer> actualPartitions = new HashMap<String, Integer>();

    while (partitionMapReader.next(key, partNum)) {
        actualPartitions.put(key.toString(), partNum.get());
    }

    partitionMapReader.close();

    /*
     * These crush files need to allocated into 5 partitions:
     *
     * in/2-1                  55 bytes
     * in/1/1.1-0            45 bytes
     * in/1/1.1-2            40 bytes
     * in/1/1.1-1            40 bytes
     * in/1/1.2-0            30 bytes
     * in/2/2.2-1            40 bytes
     * in/2/2.4/2.4.2-0   50 bytes
     *
     *    0                     1                                 2                        3                        4
     *    in/2-1 55         in/2/2.4/2.4.2-0   50   in/1/1.1-0   45   in/1/1.1-2   40   in/1/1.1-1   40
     *                                                                                  in/2/2.2-1   40   in/1/1.2-0   39
     */
    Map<String, Integer> expectedPartitions = new HashMap<String, Integer>();

    //TODO: this may not be deterministic due to jvm/hashmap/filesystem
    expectedPartitions.put(dir2.getAbsolutePath() + "-1", 0);
    expectedPartitions.put(dir2_4_2.getAbsolutePath() + "-0", 1);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-0", 2);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-2", 4);
    expectedPartitions.put(dir2_2.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_2.getAbsolutePath() + "-0", 4);

    assertThat(actualPartitions, equalTo(expectedPartitions));

    /*
     * Verify counters.
     */
    Counters actualCounters = new Counters();

    DataInputStream countersStream = FileSystem.get(job).open(crush.getCounters());

    actualCounters.readFields(countersStream);

    countersStream.close();

    assertThat(actualCounters, equalTo(expectedCounters));
}