Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:com.inmobi.conduit.distcp.tools.TestIntegration.java

License:Apache License

@Test
public void testJobConters() {
    try {//from   w w  w  .j a v  a2  s  .  c o  m
        Path listFile = new Path("target/tmp1/listing").makeQualified(fs);
        addEntries(listFile, "*");
        createFileForAudit("/conduit/streams/test1/2013/10/10/10/10/file1.gz");
        runTest(listFile, target, true);
        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        FileStatus[] statuses = fs.listStatus(counterOutputPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus status : statuses) {
            Scanner scanner = new Scanner(fs.open(status.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name = " + counterNameValue
                            + " ..skipping the line ", e);
                }
            }
        }
        // should have 2 conters per file
        Assert.assertEquals(2, numberOfCountersPerFile);
        // sum of all counter values should equal to total number of messages
        Assert.assertEquals(3, sumOfCounterValues);
        checkResult(target, 1);
    } catch (IOException e) {
        LOG.error("Exception encountered while testing distcp", e);
        Assert.fail("distcp failure");
    } finally {
        TestDistCpUtils.delete(fs, root);
    }
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;//from w  ww.ja  va 2  s . co m
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

protected FileMap<CollectorFile> createFileMap() throws IOException {
    return new FileMap<CollectorFile>() {

        @Override//from   w ww.j av a2 s.  com
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) {
                        return false;
                    }
                    return true;
                }
            };
        }

        /*
         * prepare a fileMap with files which are beyond the stopTime
         */
        @Override
        protected void buildList() throws IOException {
            if (fsIsPathExists(streamDir)) {
                FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter);
                if (fileStatuses == null || fileStatuses.length == 0) {
                    LOG.info("No files in directory:" + streamDir);
                    return;
                }
                if (stopTime == null) {
                    for (FileStatus file : fileStatuses) {
                        addPath(file);
                    }
                } else {
                    for (FileStatus file : fileStatuses) {
                        Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName());
                        if (stopTime.before(currentTimeStamp)) {
                            stopListing();
                            continue;
                        }
                        addPath(file);
                    }
                }
            } else {
                LOG.info("Collector directory does not exist");
            }
        }

        @Override
        protected TreeMap<CollectorFile, FileStatus> createFilesMap() {
            return new TreeMap<CollectorFile, FileStatus>();
        }

        @Override
        protected CollectorFile getStreamFile(String fileName) {
            return CollectorFile.create(fileName);
        }

        @Override
        protected CollectorFile getStreamFile(FileStatus file) {
            return CollectorFile.create(file.getPath().getName());
        }
    };
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

@Override
protected FileMap<HadoopStreamFile> createFileMap() throws IOException {
    return new FileMap<HadoopStreamFile>() {
        @Override//from w  w  w .j a v  a  2  s  .  c o m
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<HadoopStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<HadoopStreamFile, FileStatus>();
        }

        @Override
        protected HadoopStreamFile getStreamFile(String fileName) {
            throw new RuntimeException("Not implemented");
        }

        @Override
        protected HadoopStreamFile getStreamFile(FileStatus file) {
            return HadoopStreamFile.create(file);
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    if (path.getName().startsWith("_")) {
                        return false;
                    }
                    return true;
                }
            };
        }
    };
}

From source file:com.inmobi.databus.readers.LocalStreamCollectorReader.java

License:Apache License

public FileMap<DatabusStreamFile> createFileMap() throws IOException {
    return new FileMap<DatabusStreamFile>() {
        @Override/*w w  w  .  ja va2  s. c o m*/
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<DatabusStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<DatabusStreamFile, FileStatus>();
        }

        @Override
        protected DatabusStreamFile getStreamFile(String fileName) {
            return DatabusStreamFile.create(streamName, fileName);
        }

        @Override
        protected DatabusStreamFile getStreamFile(FileStatus file) {
            return DatabusStreamFile.create(streamName, file.getPath().getName());
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().startsWith(collector)) {
                        return true;
                    }
                    return false;
                }
            };
        }
    };
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

/**
 * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search
 * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name
 * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create
 * timestamp in descending order./*from  w ww. j a  va2s  . c  o  m*/
 *
 * @param path
 *            path of file
 * @param fs
 *            file system
 *
 * @return array of found files paths.
 * @throws IOException
 *             if files can't be listed by file system.
 *
 * @see FileSystem#listStatus(Path, PathFilter)
 * @see FilenameUtils#wildcardMatch(String, String, IOCase)
 */
public static Path[] searchFiles(Path path, FileSystem fs) throws IOException {
    FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS
        }
    });

    Path[] activityFiles = new Path[dir == null ? 0 : dir.length];
    if (dir != null) {
        Arrays.sort(dir, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1);
            }
        });

        for (int i = 0; i < dir.length; i++) {
            activityFiles[i] = dir[i].getPath();
        }
    }

    return activityFiles;
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException,
        ParseException, InstantiationException, IllegalAccessException {
    final int VERBOSE_NUM_ROWS = 4;

    Options options = new Options();

    options.addOption("h", "help", false, "shows this message");
    options.addOption("v", "verbose", false, "print summary and first few rows of each block");
    options.addOption("m", "metadata", false, "show the metadata");
    options.addOption("d", "dump", false,
            "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying output location");
    options.addOption("f", "format", true, "the data format for dumping data (AVRO or TEXT). Default: TEXT");
    options.addOption("e", "extract", true,
            "Extract one rubix block matching the block id. Use -o for specifying output location");
    options.addOption("o", true, "Store the output at the specified location");

    CommandLineParser parser = new BasicParser();

    // parse the command line arguments
    CommandLine line = parser.parse(options, args);

    // show the help message
    if (line.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(/*from   ww  w .  ja va  2s.  co  m*/
                "RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.",
                options);
        return;
    }

    // validate provided options
    if (line.hasOption("d") && line.hasOption("e")) {
        System.err.println("Cannot dump (-d) and extract (-e) at the same time!");
        return;
    }

    // obtain the list of rubix files
    String[] files = line.getArgs();
    if (files == null || files.length == 0) {
        System.err.println("Rubix file not specified");
        return;
    }

    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path(files[0]);
    FileStatus[] allFiles;

    FileStatus status = fs.getFileStatus(path);
    if (status.isDir()) {
        allFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.toString().contains(RubixConstants.RUBIX_EXTENSION);
            }

        });
    } else {
        allFiles = new FileStatus[] { status };
    }

    // walk over all files and extract the trailer section
    List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>();

    for (FileStatus s : allFiles) {
        Path p = s.getPath();

        RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p);

        // if printing meta data information.. exit after first file (since all files
        // have the same meta data)
        if (line.hasOption("m")) {
            rfile.getKeyData();

            System.out.println(new ObjectMapper().writer().writeValueAsString(rfile.metadataJson));
            break;
        }

        rfiles.add(rfile);
    }

    // dump the data
    if (line.hasOption("d")) {
        String format = line.getOptionValue("f");
        if (format == null)
            format = "TEXT";

        format = format.trim().toUpperCase();

        if (format.equals("AVRO")) {
            // dumpAvro(rfiles, line.getOptionValue("o"));
            throw new UnsupportedOperationException(
                    "Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format");
        } else if (format.equals("TEXT")) {
            if (line.hasOption("o")) {
                System.err.println("Dumping TEXT format data *into a file* is not currently supported");
                return;
            }
            dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE);
        } else {
            System.err.println("Invalid format [" + format + "] for dumping. Please use AVRO or TEXT");
            return;
        }
    } else if (line.hasOption("e")) // extract one rubix block
    {
        long blockId = Long.parseLong(line.getOptionValue("e"));
        extract(rfiles, blockId, line.getOptionValue("o"));
    } else
    // print summary
    {
        dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0);
    }
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

public static FileStatus[] getAvroPartFiles(JobConf conf, Path outPath) throws IOException {
    Path outputPath = outPath;/* w ww .  j a v  a  2  s .  c  o m*/
    FileSystem fileSystem = outputPath.getFileSystem(conf);

    FileStatus[] partFiles = fileSystem.listStatus(outputPath, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            if (path.getName().endsWith(".avro")) {
                return true;
            }
            return false;
        }
    });

    return partFiles;
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

/**
 * Returns the output from {@link CrushReducer}. Each reducer writes out a mapping of source files to crush output file.
 */// www .  j  a v  a2 s  . c om
private List<FileStatus> getOutputMappings() throws IOException {
    try {
        FileStatus[] files = fs.listStatus(outDir, new PathFilter() {
            Matcher matcher = Pattern.compile("part-\\d+").matcher("dummy");

            @Override
            public boolean accept(Path path) {
                matcher.reset(path.getName());

                return matcher.matches();
            }
        });

        return asList(files);
    } catch (FileNotFoundException e) {
        return new LinkedList<FileStatus>();
    }
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*w w w.  j a  v a  2  s. c o m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}