Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:com.inmobi.conduit.distcp.tools.TestIntegration.java

License:Apache License

@Test
public void testJobConters() {
    try {//from   w w  w  .j a v  a2  s  .  c o  m
        Path listFile = new Path("target/tmp1/listing").makeQualified(fs);
        addEntries(listFile, "*");
        createFileForAudit("/conduit/streams/test1/2013/10/10/10/10/file1.gz");
        runTest(listFile, target, true);
        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        FileStatus[] statuses = fs.listStatus(counterOutputPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus status : statuses) {
            Scanner scanner = new Scanner(fs.open(status.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name = " + counterNameValue
                            + " ..skipping the line ", e);
                }
            }
        }
        // should have 2 conters per file
        Assert.assertEquals(2, numberOfCountersPerFile);
        // sum of all counter values should equal to total number of messages
        Assert.assertEquals(3, sumOfCounterValues);
        checkResult(target, 1);
    } catch (IOException e) {
        LOG.error("Exception encountered while testing distcp", e);
        Assert.fail("distcp failure");
    } finally {
        TestDistCpUtils.delete(fs, root);
    }
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;//from w  ww.ja  va 2  s . co m
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

protected FileMap<CollectorFile> createFileMap() throws IOException {
    return new FileMap<CollectorFile>() {

        @Override//from   w ww.j av a2 s.  com
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) {
                        return false;
                    }
                    return true;
                }
            };
        }

        /*
         * prepare a fileMap with files which are beyond the stopTime
         */
        @Override
        protected void buildList() throws IOException {
            if (fsIsPathExists(streamDir)) {
                FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter);
                if (fileStatuses == null || fileStatuses.length == 0) {
                    LOG.info("No files in directory:" + streamDir);
                    return;
                }
                if (stopTime == null) {
                    for (FileStatus file : fileStatuses) {
                        addPath(file);
                    }
                } else {
                    for (FileStatus file : fileStatuses) {
                        Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName());
                        if (stopTime.before(currentTimeStamp)) {
                            stopListing();
                            continue;
                        }
                        addPath(file);
                    }
                }
            } else {
                LOG.info("Collector directory does not exist");
            }
        }

        @Override
        protected TreeMap<CollectorFile, FileStatus> createFilesMap() {
            return new TreeMap<CollectorFile, FileStatus>();
        }

        @Override
        protected CollectorFile getStreamFile(String fileName) {
            return CollectorFile.create(fileName);
        }

        @Override
        protected CollectorFile getStreamFile(FileStatus file) {
            return CollectorFile.create(file.getPath().getName());
        }
    };
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

@Override
protected FileMap<HadoopStreamFile> createFileMap() throws IOException {
    return new FileMap<HadoopStreamFile>() {
        @Override//from w  w  w .j a v  a  2  s  .  c o m
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<HadoopStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<HadoopStreamFile, FileStatus>();
        }

        @Override
        protected HadoopStreamFile getStreamFile(String fileName) {
            throw new RuntimeException("Not implemented");
        }

        @Override
        protected HadoopStreamFile getStreamFile(FileStatus file) {
            return HadoopStreamFile.create(file);
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    if (path.getName().startsWith("_")) {
                        return false;
                    }
                    return true;
                }
            };
        }
    };
}

From source file:com.inmobi.databus.readers.LocalStreamCollectorReader.java

License:Apache License

public FileMap<DatabusStreamFile> createFileMap() throws IOException {
    return new FileMap<DatabusStreamFile>() {
        @Override/*w w  w  .  ja va2  s. c o m*/
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<DatabusStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<DatabusStreamFile, FileStatus>();
        }

        @Override
        protected DatabusStreamFile getStreamFile(String fileName) {
            return DatabusStreamFile.create(streamName, fileName);
        }

        @Override
        protected DatabusStreamFile getStreamFile(FileStatus file) {
            return DatabusStreamFile.create(streamName, file.getPath().getName());
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().startsWith(collector)) {
                        return true;
                    }
                    return false;
                }
            };
        }
    };
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

/**
 * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search
 * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name
 * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create
 * timestamp in descending order./*from  w ww. j a  va2s  . c  o  m*/
 *
 * @param path
 *            path of file
 * @param fs
 *            file system
 *
 * @return array of found files paths.
 * @throws IOException
 *             if files can't be listed by file system.
 *
 * @see FileSystem#listStatus(Path, PathFilter)
 * @see FilenameUtils#wildcardMatch(String, String, IOCase)
 */
public static Path[] searchFiles(Path path, FileSystem fs) throws IOException {
    FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS
        }
    });

    Path[] activityFiles = new Path[dir == null ? 0 : dir.length];
    if (dir != null) {
        Arrays.sort(dir, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1);
            }
        });

        for (int i = 0; i < dir.length; i++) {
            activityFiles[i] = dir[i].getPath();
        }
    }

    return activityFiles;
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException,
        ParseException, InstantiationException, IllegalAccessException {
    final int VERBOSE_NUM_ROWS = 4;

    Options options = new Options();

    options.addOption("h", "help", false, "shows this message");
    options.addOption("v", "verbose", false, "print summary and first few rows of each block");
    options.addOption("m", "metadata", false, "show the metadata");
    options.addOption("d", "dump", false,
            "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying output location");
    options.addOption("f", "format", true, "the data format for dumping data (AVRO or TEXT). Default: TEXT");
    options.addOption("e", "extract", true,
            "Extract one rubix block matching the block id. Use -o for specifying output location");
    options.addOption("o", true, "Store the output at the specified location");

    CommandLineParser parser = new BasicParser();

    // parse the command line arguments
    CommandLine line = parser.parse(options, args);

    // show the help message
    if (line.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(/*from   ww  w .  ja va  2s.  co  m*/
                "RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.",
                options);
        return;
    }

    // validate provided options
    if (line.hasOption("d") && line.hasOption("e")) {
        System.err.println("Cannot dump (-d) and extract (-e) at the same time!");
        return;
    }

    // obtain the list of rubix files
    String[] files = line.getArgs();
    if (files == null || files.length == 0) {
        System.err.println("Rubix file not specified");
        return;
    }

    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path(files[0]);
    FileStatus[] allFiles;

    FileStatus status = fs.getFileStatus(path);
    if (status.isDir()) {
        allFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.toString().contains(RubixConstants.RUBIX_EXTENSION);
            }

        });
    } else {
        allFiles = new FileStatus[] { status };
    }

    // walk over all files and extract the trailer section
    List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>();

    for (FileStatus s : allFiles) {
        Path p = s.getPath();

        RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p);

        // if printing meta data information.. exit after first file (since all files
        // have the same meta data)
        if (line.hasOption("m")) {
            rfile.getKeyData();

            System.out.println(new ObjectMapper().writer().writeValueAsString(rfile.metadataJson));
            break;
        }

        rfiles.add(rfile);
    }

    // dump the data
    if (line.hasOption("d")) {
        String format = line.getOptionValue("f");
        if (format == null)
            format = "TEXT";

        format = format.trim().toUpperCase();

        if (format.equals("AVRO")) {
            // dumpAvro(rfiles, line.getOptionValue("o"));
            throw new UnsupportedOperationException(
                    "Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format");
        } else if (format.equals("TEXT")) {
            if (line.hasOption("o")) {
                System.err.println("Dumping TEXT format data *into a file* is not currently supported");
                return;
            }
            dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE);
        } else {
            System.err.println("Invalid format [" + format + "] for dumping. Please use AVRO or TEXT");
            return;
        }
    } else if (line.hasOption("e")) // extract one rubix block
    {
        long blockId = Long.parseLong(line.getOptionValue("e"));
        extract(rfiles, blockId, line.getOptionValue("o"));
    } else
    // print summary
    {
        dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0);
    }
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

public static FileStatus[] getAvroPartFiles(JobConf conf, Path outPath) throws IOException {
    Path outputPath = outPath;/* w ww .  j a v  a  2  s .  c  o m*/
    FileSystem fileSystem = outputPath.getFileSystem(conf);

    FileStatus[] partFiles = fileSystem.listStatus(outputPath, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            if (path.getName().endsWith(".avro")) {
                return true;
            }
            return false;
        }
    });

    return partFiles;
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

/**
 * Returns the output from {@link CrushReducer}. Each reducer writes out a mapping of source files to crush output file.
 */// www .  j  a v  a2 s  . c om
private List<FileStatus> getOutputMappings() throws IOException {
    try {
        FileStatus[] files = fs.listStatus(outDir, new PathFilter() {
            Matcher matcher = Pattern.compile("part-\\d+").matcher("dummy");

            @Override
            public boolean accept(Path path) {
                matcher.reset(path.getName());

                return matcher.matches();
            }
        });

        return asList(files);
    } catch (FileNotFoundException e) {
        return new LinkedList<FileStatus>();
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*w w w.  j a  v a  2  s. c o m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}