Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.ibm.bi.dml.runtime.matrix.data.TaggedInt.java

License:Open Source License

public TaggedInt() {
    tag = -1;
    base = new IntWritable();
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p,
        boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];//from   www  .ja v  a  2 s  . com
    for (int i = 1; i < counts.length; i++)
        ranges[i] = ranges[i - 1] + counts[i];

    long total = ranges[ranges.length - 1];

    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);

    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;

    FileSystem fs = FileSystem.get(_rJob);
    Path path = new Path(dir);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files)
        if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
            fileToRead = file.getPath();
            break;
        }

    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);

    FSDataInputStream currentStream = fs.open(fileToRead);
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();

    boolean contain0s = false;
    long numZeros = 0;
    if (currentPart == metadata.getPartitionOfZero()) {
        contain0s = true;
        numZeros = metadata.getNumberOfZero();
    }
    ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);

    int numRead = 0;
    while (numRead <= offset) {
        reader.readNextKeyValuePairs(readKey, readValue);
        numRead += readValue.get();
        cum_weight += readValue.get();
    }

    double ret = readKey.get();
    if (average) {
        if (numRead <= offset + 1) {
            reader.readNextKeyValuePairs(readKey, readValue);
            cum_weight += readValue.get();
            ret = (ret + readKey.get()) / 2;
        }
    }
    currentStream.close();
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}

From source file:com.jeffy.fbds.SequenceFileWriter.java

License:Apache License

public static void main(String[] args) throws IOException {
    // ?//from   w ww. j  a v a  2 s.co  m
    String uri = args[0];
    Configuration conf = new Configuration();
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(path),
            Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()))) {
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    }
}

From source file:com.jfolson.hive.serde.RBaseSerDe.java

License:Apache License

protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from   w w w.j av a2  s  .  c om*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        // Don't use array (typecode: 144) until everything supports NA values in typedbytes
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java

License:Apache License

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from w  ww  .  ja  v  a 2  s .  c  om*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public IntWritable readInt(IntWritable iw) throws IOException {
    if (iw == null) {
        iw = new IntWritable();
    }/*from ww w .  j a  va 2  s  . co  m*/
    int val = in.readInt();
    if (val == RType.NA_INTEGER) {
        return null;
    }
    iw.set(val);
    return iw;
}

From source file:com.linkedin.cubert.io.CompactWritablesDeserializer.java

License:Open Source License

private static final WritableComparable<?> createWritable(DataType type) {
    switch (type) {
    case BOOLEAN:
        return new BooleanWritable();
    case BYTE://from  www .ja va  2  s .  c o  m
        return new ByteWritable();
    case INT:
        return new IntWritable();
    case LONG:
        return new LongWritable();
    case FLOAT:
        return new FloatWritable();
    case DOUBLE:
        return new DoubleWritable();
    case STRING:
        return new Text();
    default:
        return null;
    }
}

From source file:com.liveramp.hank.hadoop.KeyAndPartitionWritable.java

License:Apache License

public KeyAndPartitionWritable() {
    key = new BytesWritable();
    partition = new IntWritable();
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*from w  w  w .java 2s .c  om*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}

From source file:com.m6d.filecrush.crush.CrushTest.java

License:Apache License

@Test
public void bucketing() throws Exception {
    File in = tmp.newFolder("in");

    Counters expectedCounters = new Counters();
    List<String> expectedBucketFiles = new ArrayList<String>();

    /*/*from   ww w  .  j  a  va  2 s. com*/
     * Create a hierarchy of directories. Directories are distinguished by a trailing slash in these comments.
     *
     *   1/
     *         1.1/
     *               file1 10 bytes
     *               file2 20 bytes
     *               file3 30 bytes
     *               file4 41 bytes
     *               file5 15 bytes
     *               file6 30 bytes
     *               file7   20 bytes
     *         1.2/
     *               file1 20 bytes
     *               file2 10 bytes
     *         1.3/
     *   2/
     *         file1 70 bytes
     *         file2 30 bytes
     *         file3 25 bytes
     *         file4 30 bytes
     *         file5 35 bytes
     *         2.1/
     *               file1 10 bytes
     *         2.2/
     *               file1 25 bytes
     *               file2 15 bytes
     *               file3 35 bytes
     *         2.3/
     *               file1 41 bytes
     *               file2 10 bytes
     *         2.4/
     *               2.4.1/
     *                     file1 100 bytes
     *                     file2   30 bytes
     *               2.4.2/
     *                     file1 20 bytes
     *                     file2 20 bytes
     *                     file3 10 bytes
     */

    /*
     * in contains 2 dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *          2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1");
    File dir2 = tmp.newFolder("in/2");

    /*
     * in/1 contains three dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *                1.1/
     *                1.2/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    File dir1_1 = tmp.newFolder("in/1/1.1");
    File dir1_2 = tmp.newFolder("in/1/1.2");
    tmp.newFolder("in/1/1.3");

    /*
     * in/2 contains five files and four dirs.
     *
     *    in/
     *          2/
     *               file1 70 bytes
     *               file2 30 bytes
     *               file3 25 bytes
     *               file4 30 bytes
     *               file5 35 bytes
     *                2.1/
     *                2.2/
     *                2.3/
     *                2.4/
     *
     *    0                  1                  2
     *    file5   35      file2 30      file4 30
     *                      file3 25
     *
     * Buckets 0 and 2 have a single file each so they are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 5);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 3);

    File dir2_1 = tmp.newFolder("in/2/2.1");
    File dir2_2 = tmp.newFolder("in/2/2.2");
    File dir2_3 = tmp.newFolder("in/2/2.3");
    tmp.newFolder("in/2/2.4");

    createFile(dir2, "file1", 70);
    createFile(dir2, "file2", 30);
    createFile(dir2, "file3", 25);
    createFile(dir2, "file4", 30);
    createFile(dir2, "file5", 35);

    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file2").getAbsolutePath()));
    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file3").getAbsolutePath()));

    /*
     * in/1/1.1 contains seven files and no dirs.
     *
     *    in/
     *          1/
     *                1.1/
     *                     file1 10 bytes
     *                     file2 20 bytes
     *                     file3 30 bytes
     *                     file4 41 bytes
     *                     file5 15 bytes
     *                     file6 30 bytes
     *                     file7   20 bytes
     *
     *    0                  1                  2
     *    file3 30      file6 30      file2 20
     *    file5 15      file1 10      file7 20
     *
     * file4 is > 50 * 0.8 so it is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 7);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 6);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir1_1, "file1", 10);
    createFile(dir1_1, "file2", 20);
    createFile(dir1_1, "file3", 30);
    createFile(dir1_1, "file4", 41);
    createFile(dir1_1, "file5", 15);
    createFile(dir1_1, "file6", 30);
    createFile(dir1_1, "file7", 20);

    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file3").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file5").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file6").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file2").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file7").getAbsolutePath()));

    /*
     * in/1/1.2 contains to files.
     *
     *    in/
     *          1/
     *                1.2/
     *                     file1 20 bytes
     *                     file2 10 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);

    createFile(dir1_2, "file1", 20);
    createFile(dir1_2, "file2", 10);

    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file2").getAbsolutePath()));

    /*
     * in/1/1.3 is empty.
     *
     *    in/
     *          1/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1/1.3");

    /*
     * in/2/2.1 contains on file.
     *
     *    in/
     *          2/
     *                2.1/
     *                     file1 10 bytes
     *
     * Single file dirs are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_1, "file1", 10);

    /*
     * in/2/2.2 contains three files.
     *
     *    in/
     *          2/
     *                2.2/
     *                     file1 25 bytes
     *                     file2 15 bytes
     *                     file3 35 bytes
     *
     *    0                  1
     *    file3 35      file1 25
     *                      file2 15
     *
     * Bucket 0 with a single file is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_2, "file1", 25);
    createFile(dir2_2, "file2", 15);
    createFile(dir2_2, "file3", 35);

    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file2").getAbsolutePath()));

    /*
     * in/2/2.3 contains 2 files.
     *
     *    in/
     *          2/
     *                2.3/
     *                     file1 41 bytes
     *                     file2 10 bytes
     *
     * file1 is too big and leaving file2 as a single file, which is also ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_3, "file1", 41);
    createFile(dir2_3, "file2", 10);

    /*
     * in/2/2.4 contains two sub directories and no files.
     *
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                     2.4.2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/2/2.4");

    File dir2_4_1 = tmp.newFolder("in/2/2.4/2.4.1");
    File dir2_4_2 = tmp.newFolder("in/2/2.4/2.4.2");

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                           file1 100 bytes
     *                           file2   30 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_4_1, "file1", 100);
    createFile(dir2_4_1, "file2", 30);

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.2/
     *                           file1 20 bytes
     *                           file2 20 bytes
     *                           file3 10 bytes
     *   0
     *   file1 20
     *   file2 20
     *   file3 10
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 3);

    createFile(dir2_4_2, "file1", 20);
    createFile(dir2_4_2, "file2", 20);
    createFile(dir2_4_2, "file3", 10);

    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file2").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file3").getAbsolutePath()));

    Crush crush = new Crush();

    crush.setConf(job);
    crush.setFileSystem(fileSystem);

    /*
     * Call these in the same order that run() does.
     */
    crush.createJobConfAndParseArgs("--compress=none", "--max-file-blocks=1", in.getAbsolutePath(),
            new File(tmp.getRoot(), "out").getAbsolutePath(), "20101124171730");
    crush.writeDirs();

    /*
     * Verify bucket contents.
     */

    List<String> actualBucketFiles = new ArrayList<String>();

    Text key = new Text();
    Text value = new Text();

    Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job);
    while (reader.next(key, value)) {
        actualBucketFiles.add(format("%s\t%s", key, value));
    }
    reader.close();

    Collections.sort(expectedBucketFiles);
    Collections.sort(actualBucketFiles);

    assertThat(actualBucketFiles, equalTo(expectedBucketFiles));

    /*
     * Verify the partition map.
     */
    Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job);

    IntWritable partNum = new IntWritable();

    Map<String, Integer> actualPartitions = new HashMap<String, Integer>();

    while (partitionMapReader.next(key, partNum)) {
        actualPartitions.put(key.toString(), partNum.get());
    }

    partitionMapReader.close();

    /*
     * These crush files need to allocated into 5 partitions:
     *
     * in/2-1                  55 bytes
     * in/1/1.1-0            45 bytes
     * in/1/1.1-2            40 bytes
     * in/1/1.1-1            40 bytes
     * in/1/1.2-0            30 bytes
     * in/2/2.2-1            40 bytes
     * in/2/2.4/2.4.2-0   50 bytes
     *
     *    0                     1                                 2                        3                        4
     *    in/2-1 55         in/2/2.4/2.4.2-0   50   in/1/1.1-0   45   in/1/1.1-2   40   in/1/1.1-1   40
     *                                                                                  in/2/2.2-1   40   in/1/1.2-0   39
     */
    Map<String, Integer> expectedPartitions = new HashMap<String, Integer>();

    //TODO: this may not be deterministic due to jvm/hashmap/filesystem
    expectedPartitions.put(dir2.getAbsolutePath() + "-1", 0);
    expectedPartitions.put(dir2_4_2.getAbsolutePath() + "-0", 1);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-0", 2);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-2", 4);
    expectedPartitions.put(dir2_2.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_2.getAbsolutePath() + "-0", 4);

    assertThat(actualPartitions, equalTo(expectedPartitions));

    /*
     * Verify counters.
     */
    Counters actualCounters = new Counters();

    DataInputStream countersStream = FileSystem.get(job).open(crush.getCounters());

    actualCounters.readFields(countersStream);

    countersStream.close();

    assertThat(actualCounters, equalTo(expectedCounters));
}