Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    /**/*from   w  w  w.  j ava2 s .  c  o m*/
     * Option seqOpt =
     * obuilder.withLongName("seqFile").withRequired(false).withArgument(
     * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
     * withDescription(
     * "The Sequence File containing the Vectors").withShortName
     * ("s").create(); Option dirOpt =
     * obuilder.withLongName("seqDirectory").
     * withRequired(false).withArgument(
     * abuilder.withName("seqDirectory").withMinimum
     * (1).withMaximum(1).create()) .withDescription(
     * "The directory containing Sequence File of Vectors")
     * .withShortName("d").create();
     */
    addInputOption();
    addOutputOption();
    addOption("useKey", "u", "If the Key is a vector than dump that instead");
    addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
    addOption("dictionary", "d", "The dictionary file.", false);
    addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
    addOption("csv", "c",
            "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
    addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector "
            + "(if the vector is one) printing out the name");
    addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
    addOption("sortVectors", "sort",
            "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order");
    addOption("quiet", "q", "Print only file contents");
    addOption("sizeOnly", "sz", "Dump only the size of the vector");
    addOption("numItems", "ni", "Output at most <n> vecors", false);
    addOption("vectorSize", "vs",
            "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort",
            false);
    addOption(buildOption("filter", "fi",
            "Only dump out those vectors whose name matches the filter."
                    + "  Multiple items may be specified by repeating the argument.",
            true, 1, Integer.MAX_VALUE, false, null));

    if (parseArguments(args, false, true) == null) {
        return -1;
    }

    Path[] pathArr;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path input = getInputPath();
    FileStatus fileStatus = fs.getFileStatus(input);
    if (fileStatus.isDir()) {
        pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter()));
    } else {
        FileStatus[] inputPaths = fs.globStatus(input);
        pathArr = new Path[inputPaths.length];
        int i = 0;
        for (FileStatus fstatus : inputPaths) {
            pathArr[i++] = fstatus.getPath();
        }
    }

    String dictionaryType = getOption("dictionaryType", "text");

    boolean sortVectors = hasOption("sortVectors");
    boolean quiet = hasOption("quiet");
    if (!quiet) {
        log.info("Sort? {}", sortVectors);
    }

    String[] dictionary = null;
    if (hasOption("dictionary")) {
        String dictFile = getOption("dictionary");
        if ("text".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
        } else if ("sequencefile".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
        } else {
            // TODO: support Lucene's FST as a dictionary type
            throw new IOException("Invalid dictionary type: " + dictionaryType);
        }
    }

    Set<String> filters;
    if (hasOption("filter")) {
        filters = Sets.newHashSet(getOptions("filter"));
    } else {
        filters = null;
    }

    boolean useCSV = hasOption("csv");

    boolean sizeOnly = hasOption("sizeOnly");
    boolean nameOnly = hasOption("nameOnly");
    boolean namesAsComments = hasOption("namesAsComments");
    boolean transposeKeyValue = hasOption("vectorAsKey");
    Writer writer;
    boolean shouldClose;
    File output = getOutputFile();
    if (output != null) {
        shouldClose = true;
        log.info("Output file: {}", output);
        Files.createParentDirs(output);
        writer = Files.newWriter(output, Charsets.UTF_8);
    } else {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    }
    try {
        boolean printKey = hasOption("printKey");
        if (useCSV && dictionary != null) {
            writer.write("#");
            for (int j = 0; j < dictionary.length; j++) {
                writer.write(dictionary[j]);
                if (j < dictionary.length - 1) {
                    writer.write(',');
                }
            }
            writer.write('\n');
        }
        Long numItems = null;
        if (hasOption("numItems")) {
            numItems = Long.parseLong(getOption("numItems"));
            if (quiet) {
                writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
            }
        }
        int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize"))
                : Integer.MAX_VALUE;
        long itemCount = 0;
        int fileCount = 0;
        for (Path path : pathArr) {
            if (numItems != null && numItems <= itemCount) {
                break;
            }
            if (quiet) {
                log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length);
            }
            SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(
                    path, true, conf);
            Iterator<Pair<Writable, Writable>> iterator = iterable.iterator();
            long i = 0;
            while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                Pair<Writable, Writable> record = iterator.next();
                Writable keyWritable = record.getFirst();
                Writable valueWritable = record.getSecond();
                if (printKey) {
                    Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                    writer.write(notTheVectorWritable.toString());
                    writer.write('\t');
                }
                Vector vector;
                try {
                    vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get();
                } catch (ClassCastException e) {
                    if ((transposeKeyValue ? keyWritable
                            : valueWritable) instanceof WeightedPropertyVectorWritable) {
                        vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable
                                : valueWritable)).getVector();
                    } else {
                        throw e;
                    }
                }
                if (filters != null && vector instanceof NamedVector
                        && !filters.contains(((NamedVector) vector).getName())) {
                    // we are filtering out this item, skip
                    continue;
                }
                if (sizeOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write(":");
                    } else {
                        writer.write(String.valueOf(i++));
                        writer.write(":");
                    }
                    writer.write(String.valueOf(vector.size()));
                    writer.write('\n');
                } else if (nameOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write('\n');
                    }
                } else {
                    String fmtStr;
                    if (useCSV) {
                        fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
                    } else {
                        fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
                                sortVectors);
                    }
                    writer.write(fmtStr);
                    writer.write('\n');
                }
                itemCount++;
            }
        }
        writer.flush();
    } finally {
        if (shouldClose) {
            Closeables.close(writer, false);
        }
    }

    return 0;
}

From source file:dz.lab.hdfs.SimpleGlobbing.java

public static void main(String[] args) throws IOException {
    Scanner in = new Scanner(System.in);
    System.out.print("Type in a glob (e.g. '/tmp/glob'): ");

    // read glob from command line    
    Path glob = new Path(in.next());
    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] files = fs.globStatus(glob);

    // similar usage to listStatus method
    for (FileStatus file : files) {
        System.out.println(file.getPath().getName());
    }//from ww w .j ava 2 s.co  m
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Returns the global index (partitions) of a file that is indexed using
 * the index command. If the file is not indexed, it returns null.
 * The return value is of type {@link GlobalIndex} where the generic
 * parameter is specified as {@link Partition}.
 * @param fs/*from  w ww  .j a  va2 s  .c  om*/
 * @param dir
 * @return
 */
public static GlobalIndex<Partition> getGlobalIndex(FileSystem fs, Path dir) {
    try {
        FileStatus[] allFiles;
        if (OperationsParams.isWildcard(dir)) {
            allFiles = fs.globStatus(dir);
        } else {
            allFiles = fs.listStatus(dir);
        }

        FileStatus masterFile = null;
        int nasaFiles = 0;
        for (FileStatus fileStatus : allFiles) {
            if (fileStatus.getPath().getName().startsWith("_master")) {
                if (masterFile != null)
                    throw new RuntimeException("Found more than one master file in " + dir);
                masterFile = fileStatus;
            } else if (fileStatus.getPath().getName().toLowerCase()
                    .matches(".*h\\d\\dv\\d\\d.*\\.(hdf|jpg|xml)")) {
                // Handle on-the-fly global indexes imposed from file naming of NASA data
                nasaFiles++;
            }
        }
        if (masterFile != null) {
            ShapeIterRecordReader reader = new ShapeIterRecordReader(fs.open(masterFile.getPath()), 0,
                    masterFile.getLen());
            Rectangle dummy = reader.createKey();
            reader.setShape(new Partition());
            ShapeIterator values = reader.createValue();
            ArrayList<Partition> partitions = new ArrayList<Partition>();
            while (reader.next(dummy, values)) {
                for (Shape value : values) {
                    partitions.add((Partition) value.clone());
                }
            }
            GlobalIndex<Partition> globalIndex = new GlobalIndex<Partition>();
            globalIndex.bulkLoad(partitions.toArray(new Partition[partitions.size()]));
            String extension = masterFile.getPath().getName();
            extension = extension.substring(extension.lastIndexOf('.') + 1);
            globalIndex.setCompact(GridRecordWriter.PackedIndexes.contains(extension));
            globalIndex.setReplicated(GridRecordWriter.ReplicatedIndexes.contains(extension));
            return globalIndex;
        } else if (nasaFiles > allFiles.length / 2) {
            // A folder that contains HDF files
            // Create a global index on the fly for these files based on their names
            Partition[] partitions = new Partition[allFiles.length];
            for (int i = 0; i < allFiles.length; i++) {
                final Pattern cellRegex = Pattern.compile(".*(h\\d\\dv\\d\\d).*");
                String filename = allFiles[i].getPath().getName();
                Matcher matcher = cellRegex.matcher(filename);
                Partition partition = new Partition();
                partition.filename = filename;
                if (matcher.matches()) {
                    String cellname = matcher.group(1);
                    int h = Integer.parseInt(cellname.substring(1, 3));
                    int v = Integer.parseInt(cellname.substring(4, 6));
                    partition.cellId = v * 36 + h;
                    // Calculate coordinates on MODIS Sinusoidal grid
                    partition.x1 = h * 10 - 180;
                    partition.y2 = (18 - v) * 10 - 90;
                    partition.x2 = partition.x1 + 10;
                    partition.y1 = partition.y2 - 10;
                    // Convert to Latitude Longitude
                    double lon1 = partition.x1 / Math.cos(partition.y1 * Math.PI / 180);
                    double lon2 = partition.x1 / Math.cos(partition.y2 * Math.PI / 180);
                    partition.x1 = Math.min(lon1, lon2);
                    lon1 = partition.x2 / Math.cos(partition.y1 * Math.PI / 180);
                    lon2 = partition.x2 / Math.cos(partition.y2 * Math.PI / 180);
                    partition.x2 = Math.max(lon1, lon2);
                } else {
                    partition.set(-180, -90, 180, 90);
                    partition.cellId = allFiles.length + i;
                }
                partitions[i] = partition;
            }
            GlobalIndex<Partition> gindex = new GlobalIndex<Partition>();
            gindex.bulkLoad(partitions);
            return gindex;
        } else {
            return null;
        }
    } catch (IOException e) {
        LOG.info("Error retrieving global index of '" + dir + "'");
        LOG.info(e);
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null) {
        FileStatus[] listStatus;/*from   w w w  . j a  v  a2s. c o m*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                listStatus(fs, status.getPath(), result, filter);
            } else if (status.getPath().getName().toLowerCase().endsWith(".list")) {
                LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(),
                        Integer.MAX_VALUE);
                LongWritable key = in.createKey();
                Text value = in.createValue();
                while (in.next(key, value)) {
                    result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString())));
                }
                in.close();
            } else {
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null || filter == null) {
        // No global index which means we cannot use the filter function
        FileStatus[] listStatus;/*from   ww w  .j  a  v  a 2  s. com*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                // Recursively go in subdir
                listStatus(fs, status.getPath(), result, filter);
            } else {
                // A file, just add it
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.MakeHDFVideo.java

License:Open Source License

/**
 * Crop all images in the given directory.
 * @param output/*www . j a  v a2 s. co  m*/
 * @throws IOException 
 */
public static void cropImages(Path dir, Rectangle original, Rectangle extended) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    if (!extended.contains(original))
        throw new RuntimeException("Original rectangle must be totally contained in the extended rectangle. "
                + original + " is not contained in " + extended);

    for (FileStatus imageFile : allImages) {
        FSDataInputStream instream = fs.open(imageFile.getPath());
        BufferedImage img = ImageIO.read(instream);
        instream.close();

        int crop_x1 = (int) Math.floor((original.x1 - extended.x1) * img.getWidth() / extended.getWidth());
        int crop_y1 = (int) Math.floor((original.y1 - extended.y1) * img.getHeight() / extended.getHeight());
        int crop_x2 = (int) Math.ceil((original.x2 - extended.x1) * img.getWidth() / extended.getWidth());
        int crop_y2 = (int) Math.ceil((original.y2 - extended.y1) * img.getHeight() / extended.getHeight());
        // Ensure even height for compatibility with some codecs
        if ((crop_y2 - crop_y1) % 2 == 1)
            crop_y2++;

        BufferedImage cropped = new BufferedImage(crop_x2 - crop_x1, crop_y2 - crop_y1,
                BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = cropped.createGraphics();
        g.setBackground(new Color(0, true));
        g.clearRect(0, 0, cropped.getWidth(), cropped.getHeight());
        g.drawImage(img, 0, 0, cropped.getWidth(), cropped.getHeight(), crop_x1, crop_y1, crop_x2, crop_y2,
                null);
        g.dispose();

        FSDataOutputStream outstream = fs.create(imageFile.getPath(), true);
        ImageIO.write(cropped, "png", outstream);
        outstream.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

/**
 * Recover all images in the given dir. The passed directory contains a set of
 * images each corresponding to a specific date. The algorithm works in
 * iterations. In the first iteration, each image is used to recover holes in
 * the image that directly follows it. For example, the valid points in the
 * image of day 1 is used to recover missing points in the image of day 2. In
 * the second iteration, each image is used to recover missing points in the
 * preceding day. All odd numbered iterations work like the first iteration
 * while all even numbered iterations work like the second one. The algorithm
 * works like this until either all images are recovered or we run n iterations
 * where n is the total number of all images (time points).
 * /*from   w w w.  j ava  2s. c om*/
 * @param dir
 * @throws IOException 
 */
public static void recoverNearest(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    Arrays.sort(allImages, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            // Sort alphabetically based on file name
            return o1.getPath().getName().compareTo(o2.getPath().getName());
        }
    });
    // A sorted list of the index of all images that still have holes
    Vector<Integer> hollowImages = new Vector<Integer>();
    for (int i_image = 0; i_image < allImages.length; i_image++)
        hollowImages.add(i_image);
    for (int iter = 0; iter < allImages.length; iter++) {
        int i1, i2, increment;
        if (iter % 2 == 0) {
            i1 = hollowImages.size() - 2;
            i2 = -1;
            increment = -1;
        } else {
            i1 = 1;
            i2 = hollowImages.size();
            increment = 1;
        }
        for (int i_img = i1; i_img != i2; i_img += increment) {
            FSDataInputStream instream = fs.open(allImages[hollowImages.get(i_img)].getPath());
            BufferedImage img = ImageIO.read(instream);
            instream.close();
            instream = fs.open(allImages[hollowImages.get(i_img) - increment].getPath());
            BufferedImage img_bg = ImageIO.read(instream);
            instream.close();

            Graphics2D graphics = img_bg.createGraphics();
            graphics.drawImage(img, 0, 0, null);
            graphics.dispose();
            FSDataOutputStream outstream = fs.create(allImages[hollowImages.get(i_img)].getPath(), true);
            ImageIO.write(img_bg, "png", outstream);
            outstream.close();
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

/**
 * Recover all images in the given directory by performing an interpolation
 * on each missing point from the two nearest points on each side on its
 * horizontal line (the nearest left and right points).
 * /*from w ww  .ja  va 2 s  . co m*/
 * To determine which points should be interpolated (e.g., under cloud) and
 * which points should remain blank (e.g., in sea), we first overlay all
 * images on top of each other. If a point is missing from all images, it
 * indicates with very high probability that it should remain blank. 
 * @param dir
 * @throws IOException
 */
public static void recoverInterpolationDir(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    Arrays.sort(allImages, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            // Sort alphabetically based on file name
            return o1.getPath().getName().compareTo(o2.getPath().getName());
        }
    });

    // Create a mask of valid points by overlaying all images on each other
    BufferedImage mask = null;
    Graphics2D g = null;
    for (FileStatus imageFile : allImages) {
        FSDataInputStream instream = fs.open(imageFile.getPath());
        BufferedImage img = ImageIO.read(instream);
        instream.close();

        if (g == null) {
            mask = img;
            g = mask.createGraphics();
        } else {
            g.drawImage(img, 0, 0, null);
        }
    }
    g.dispose();

    // Recover missing points on each image
    for (FileStatus imageFile : allImages) {
        recoverImageInterpolation(fs, mask, imageFile);
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

public static void addDate(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);

    final Font font = new Font("Arial", Font.BOLD, 48);
    final SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyy.MM.dd");
    final SimpleDateFormat outputDateFormat = new SimpleDateFormat("dd MMM");

    for (FileStatus imageFile : allImages) {
        try {/*from   w  w w  .j a va 2 s.c om*/
            FSDataInputStream instream = fs.open(imageFile.getPath());
            BufferedImage img = ImageIO.read(instream);
            instream.close();

            Graphics2D g = img.createGraphics();
            g.setFont(font);
            String filename = imageFile.getPath().getName();
            String dateStr = filename.substring(0, filename.length() - 4);
            Date date = inputDateFormat.parse(dateStr);
            String text = outputDateFormat.format(date);
            g.setColor(Color.BLACK);
            g.drawString(text, 5, img.getHeight() - 5);
            g.dispose();

            FSDataOutputStream outstream = fs.create(imageFile.getPath(), true);
            ImageIO.write(img, "png", outstream);
            outstream.close();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    switch (args.size()) {
    case 0://from  w  ww  .  ja va  2s. co  m
        return missingArg("WORKDIR");
    case 1:
        return missingArg("LEVELS");
    case 2:
        return missingArg("INPATH");
    default:
        break;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    levels = args.get(1).split(",");
    for (String l : levels) {
        try {
            int lvl = Integer.parseInt(l);
            if (lvl > 0)
                continue;
            System.err.printf("summarize :: summary level '%d' is not positive!\n", lvl);
        } catch (NumberFormatException e) {
            System.err.printf("summarize :: summary level '%s' is not an integer!\n", l);
        }
        return 3;
    }

    wrkDir = new Path(args.get(0));
    final Path bam = new Path(args.get(2));

    final boolean sort = parser.getBoolean(sortOpt);

    final Configuration conf = getConf();

    conf.setBoolean(AnySAMInputFormat.TRUST_EXTS_PROPERTY, !parser.getBoolean(noTrustExtsOpt));

    // Used by Utils.getMergeableWorkFile() to name the output files.
    wrkFile = bam.getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, wrkFile);

    conf.setStrings(SummarizeReducer.SUMMARY_LEVELS_PROP, levels);

    try {
        try {
            // There's a lot of different Paths here, and it can get a bit
            // confusing. Here's how it works:
            //
            // - outPath is the output dir for the final merged output, given
            //   with the -o parameter.
            //
            // - wrkDir is the user-given path where the outputs of the
            //   reducers go.
            //
            // - mergedTmpDir (defined further below) is $wrkDir/sort.tmp: if
            //   we are sorting, the summaries output in the first Hadoop job
            //   are merged in there.
            //
            // - mainSortOutputDir is $wrkDir/sorted.tmp: getSortOutputDir()
            //   gives a per-level/strand directory under it, which is used by
            //   doSorting() and mergeOne(). This is necessary because we
            //   cannot have multiple Hadoop jobs outputting into the same
            //   directory at the same time, as explained in the comment in
            //   sortMerged().

            // Required for path ".", for example.
            wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

            mainSortOutputDir = sort ? new Path(wrkDir, "sorted.tmp") : null;

            if (!runSummary(bam))
                return 4;
        } catch (IOException e) {
            System.err.printf("summarize :: Summarizing failed: %s\n", e);
            return 4;
        }

        Path mergedTmpDir = null;
        try {
            if (sort) {
                mergedTmpDir = new Path(wrkDir, "sort.tmp");
                mergeOutputs(mergedTmpDir);

            } else if (outPath != null)
                mergeOutputs(outPath);

        } catch (IOException e) {
            System.err.printf("summarize :: Merging failed: %s\n", e);
            return 5;
        }

        if (sort) {
            if (!doSorting(mergedTmpDir))
                return 6;

            // Reset this since SummarySort uses it.
            conf.set(Utils.WORK_FILENAME_PROPERTY, wrkFile);

            tryDelete(mergedTmpDir);

            if (outPath != null)
                try {
                    sorted = true;
                    mergeOutputs(outPath);
                } catch (IOException e) {
                    System.err.printf("summarize :: Merging sorted output failed: %s\n", e);
                    return 7;
                }
            else {
                // Move the unmerged results out of the mainSortOutputDir
                // subdirectories to wrkDir.

                System.out.println("summarize :: Moving outputs from temporary directories...");
                t.start();

                try {
                    final FileSystem fs = wrkDir.getFileSystem(conf);
                    for (String lvl : levels) {
                        final FileStatus[] parts;

                        try {
                            parts = fs.globStatus(new Path(new Path(mainSortOutputDir, lvl + "[fr]"),
                                    "*-[0-9][0-9][0-9][0-9][0-9][0-9]"));
                        } catch (IOException e) {
                            System.err.printf("summarize :: Couldn't move level %s results: %s", lvl, e);
                            continue;
                        }

                        for (FileStatus part : parts) {
                            final Path path = part.getPath();
                            try {
                                fs.rename(path, new Path(wrkDir, path.getName()));
                            } catch (IOException e) {
                                System.err.printf("summarize :: Couldn't move '%s': %s", path, e);
                            }
                        }
                    }
                } catch (IOException e) {
                    System.err.printf("summarize :: Moving results failed: %s", e);
                }
                System.out.printf("summarize :: Moved in %d.%03d s.\n", t.stopS(), t.fms());
            }
            tryDelete(mainSortOutputDir);
        }
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    return 0;
}