Example usage for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    /**/*from   w  w  w.  j ava2 s .  c  o m*/
     * Option seqOpt =
     * obuilder.withLongName("seqFile").withRequired(false).withArgument(
     * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
     * withDescription(
     * "The Sequence File containing the Vectors").withShortName
     * ("s").create(); Option dirOpt =
     * obuilder.withLongName("seqDirectory").
     * withRequired(false).withArgument(
     * abuilder.withName("seqDirectory").withMinimum
     * (1).withMaximum(1).create()) .withDescription(
     * "The directory containing Sequence File of Vectors")
     * .withShortName("d").create();
     */
    addInputOption();
    addOutputOption();
    addOption("useKey", "u", "If the Key is a vector than dump that instead");
    addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
    addOption("dictionary", "d", "The dictionary file.", false);
    addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
    addOption("csv", "c",
            "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
    addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector "
            + "(if the vector is one) printing out the name");
    addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
    addOption("sortVectors", "sort",
            "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order");
    addOption("quiet", "q", "Print only file contents");
    addOption("sizeOnly", "sz", "Dump only the size of the vector");
    addOption("numItems", "ni", "Output at most <n> vecors", false);
    addOption("vectorSize", "vs",
            "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort",
            false);
    addOption(buildOption("filter", "fi",
            "Only dump out those vectors whose name matches the filter."
                    + "  Multiple items may be specified by repeating the argument.",
            true, 1, Integer.MAX_VALUE, false, null));

    if (parseArguments(args, false, true) == null) {
        return -1;
    }

    Path[] pathArr;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path input = getInputPath();
    FileStatus fileStatus = fs.getFileStatus(input);
    if (fileStatus.isDir()) {
        pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter()));
    } else {
        FileStatus[] inputPaths = fs.globStatus(input);
        pathArr = new Path[inputPaths.length];
        int i = 0;
        for (FileStatus fstatus : inputPaths) {
            pathArr[i++] = fstatus.getPath();
        }
    }

    String dictionaryType = getOption("dictionaryType", "text");

    boolean sortVectors = hasOption("sortVectors");
    boolean quiet = hasOption("quiet");
    if (!quiet) {
        log.info("Sort? {}", sortVectors);
    }

    String[] dictionary = null;
    if (hasOption("dictionary")) {
        String dictFile = getOption("dictionary");
        if ("text".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
        } else if ("sequencefile".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
        } else {
            // TODO: support Lucene's FST as a dictionary type
            throw new IOException("Invalid dictionary type: " + dictionaryType);
        }
    }

    Set<String> filters;
    if (hasOption("filter")) {
        filters = Sets.newHashSet(getOptions("filter"));
    } else {
        filters = null;
    }

    boolean useCSV = hasOption("csv");

    boolean sizeOnly = hasOption("sizeOnly");
    boolean nameOnly = hasOption("nameOnly");
    boolean namesAsComments = hasOption("namesAsComments");
    boolean transposeKeyValue = hasOption("vectorAsKey");
    Writer writer;
    boolean shouldClose;
    File output = getOutputFile();
    if (output != null) {
        shouldClose = true;
        log.info("Output file: {}", output);
        Files.createParentDirs(output);
        writer = Files.newWriter(output, Charsets.UTF_8);
    } else {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    }
    try {
        boolean printKey = hasOption("printKey");
        if (useCSV && dictionary != null) {
            writer.write("#");
            for (int j = 0; j < dictionary.length; j++) {
                writer.write(dictionary[j]);
                if (j < dictionary.length - 1) {
                    writer.write(',');
                }
            }
            writer.write('\n');
        }
        Long numItems = null;
        if (hasOption("numItems")) {
            numItems = Long.parseLong(getOption("numItems"));
            if (quiet) {
                writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
            }
        }
        int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize"))
                : Integer.MAX_VALUE;
        long itemCount = 0;
        int fileCount = 0;
        for (Path path : pathArr) {
            if (numItems != null && numItems <= itemCount) {
                break;
            }
            if (quiet) {
                log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length);
            }
            SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(
                    path, true, conf);
            Iterator<Pair<Writable, Writable>> iterator = iterable.iterator();
            long i = 0;
            while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                Pair<Writable, Writable> record = iterator.next();
                Writable keyWritable = record.getFirst();
                Writable valueWritable = record.getSecond();
                if (printKey) {
                    Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                    writer.write(notTheVectorWritable.toString());
                    writer.write('\t');
                }
                Vector vector;
                try {
                    vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get();
                } catch (ClassCastException e) {
                    if ((transposeKeyValue ? keyWritable
                            : valueWritable) instanceof WeightedPropertyVectorWritable) {
                        vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable
                                : valueWritable)).getVector();
                    } else {
                        throw e;
                    }
                }
                if (filters != null && vector instanceof NamedVector
                        && !filters.contains(((NamedVector) vector).getName())) {
                    // we are filtering out this item, skip
                    continue;
                }
                if (sizeOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write(":");
                    } else {
                        writer.write(String.valueOf(i++));
                        writer.write(":");
                    }
                    writer.write(String.valueOf(vector.size()));
                    writer.write('\n');
                } else if (nameOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write('\n');
                    }
                } else {
                    String fmtStr;
                    if (useCSV) {
                        fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
                    } else {
                        fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
                                sortVectors);
                    }
                    writer.write(fmtStr);
                    writer.write('\n');
                }
                itemCount++;
            }
        }
        writer.flush();
    } finally {
        if (shouldClose) {
            Closeables.close(writer, false);
        }
    }

    return 0;
}

From source file:dz.lab.hdfs.SimpleGlobbing.java

public static void main(String[] args) throws IOException {
    Scanner in = new Scanner(System.in);
    System.out.print("Type in a glob (e.g. '/tmp/glob'): ");

    // read glob from command line    
    Path glob = new Path(in.next());
    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] files = fs.globStatus(glob);

    // similar usage to listStatus method
    for (FileStatus file : files) {
        System.out.println(file.getPath().getName());
    }//from ww w .j ava 2 s.co  m
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Returns the global index (partitions) of a file that is indexed using
 * the index command. If the file is not indexed, it returns null.
 * The return value is of type {@link GlobalIndex} where the generic
 * parameter is specified as {@link Partition}.
 * @param fs/*from  w ww  .j a  va2 s  .c  om*/
 * @param dir
 * @return
 */
public static GlobalIndex<Partition> getGlobalIndex(FileSystem fs, Path dir) {
    try {
        FileStatus[] allFiles;
        if (OperationsParams.isWildcard(dir)) {
            allFiles = fs.globStatus(dir);
        } else {
            allFiles = fs.listStatus(dir);
        }

        FileStatus masterFile = null;
        int nasaFiles = 0;
        for (FileStatus fileStatus : allFiles) {
            if (fileStatus.getPath().getName().startsWith("_master")) {
                if (masterFile != null)
                    throw new RuntimeException("Found more than one master file in " + dir);
                masterFile = fileStatus;
            } else if (fileStatus.getPath().getName().toLowerCase()
                    .matches(".*h\\d\\dv\\d\\d.*\\.(hdf|jpg|xml)")) {
                // Handle on-the-fly global indexes imposed from file naming of NASA data
                nasaFiles++;
            }
        }
        if (masterFile != null) {
            ShapeIterRecordReader reader = new ShapeIterRecordReader(fs.open(masterFile.getPath()), 0,
                    masterFile.getLen());
            Rectangle dummy = reader.createKey();
            reader.setShape(new Partition());
            ShapeIterator values = reader.createValue();
            ArrayList<Partition> partitions = new ArrayList<Partition>();
            while (reader.next(dummy, values)) {
                for (Shape value : values) {
                    partitions.add((Partition) value.clone());
                }
            }
            GlobalIndex<Partition> globalIndex = new GlobalIndex<Partition>();
            globalIndex.bulkLoad(partitions.toArray(new Partition[partitions.size()]));
            String extension = masterFile.getPath().getName();
            extension = extension.substring(extension.lastIndexOf('.') + 1);
            globalIndex.setCompact(GridRecordWriter.PackedIndexes.contains(extension));
            globalIndex.setReplicated(GridRecordWriter.ReplicatedIndexes.contains(extension));
            return globalIndex;
        } else if (nasaFiles > allFiles.length / 2) {
            // A folder that contains HDF files
            // Create a global index on the fly for these files based on their names
            Partition[] partitions = new Partition[allFiles.length];
            for (int i = 0; i < allFiles.length; i++) {
                final Pattern cellRegex = Pattern.compile(".*(h\\d\\dv\\d\\d).*");
                String filename = allFiles[i].getPath().getName();
                Matcher matcher = cellRegex.matcher(filename);
                Partition partition = new Partition();
                partition.filename = filename;
                if (matcher.matches()) {
                    String cellname = matcher.group(1);
                    int h = Integer.parseInt(cellname.substring(1, 3));
                    int v = Integer.parseInt(cellname.substring(4, 6));
                    partition.cellId = v * 36 + h;
                    // Calculate coordinates on MODIS Sinusoidal grid
                    partition.x1 = h * 10 - 180;
                    partition.y2 = (18 - v) * 10 - 90;
                    partition.x2 = partition.x1 + 10;
                    partition.y1 = partition.y2 - 10;
                    // Convert to Latitude Longitude
                    double lon1 = partition.x1 / Math.cos(partition.y1 * Math.PI / 180);
                    double lon2 = partition.x1 / Math.cos(partition.y2 * Math.PI / 180);
                    partition.x1 = Math.min(lon1, lon2);
                    lon1 = partition.x2 / Math.cos(partition.y1 * Math.PI / 180);
                    lon2 = partition.x2 / Math.cos(partition.y2 * Math.PI / 180);
                    partition.x2 = Math.max(lon1, lon2);
                } else {
                    partition.set(-180, -90, 180, 90);
                    partition.cellId = allFiles.length + i;
                }
                partitions[i] = partition;
            }
            GlobalIndex<Partition> gindex = new GlobalIndex<Partition>();
            gindex.bulkLoad(partitions);
            return gindex;
        } else {
            return null;
        }
    } catch (IOException e) {
        LOG.info("Error retrieving global index of '" + dir + "'");
        LOG.info(e);
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null) {
        FileStatus[] listStatus;/*from   w w w  . j a  v  a2s. c o m*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                listStatus(fs, status.getPath(), result, filter);
            } else if (status.getPath().getName().toLowerCase().endsWith(".list")) {
                LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(),
                        Integer.MAX_VALUE);
                LongWritable key = in.createKey();
                Text value = in.createValue();
                while (in.next(key, value)) {
                    result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString())));
                }
                in.close();
            } else {
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null || filter == null) {
        // No global index which means we cannot use the filter function
        FileStatus[] listStatus;/*from   ww w  .j  a  v  a 2  s. com*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                // Recursively go in subdir
                listStatus(fs, status.getPath(), result, filter);
            } else {
                // A file, just add it
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.MakeHDFVideo.java

License:Open Source License

/**
 * Crop all images in the given directory.
 * @param output/*www . j a  v a2 s. co  m*/
 * @throws IOException 
 */
public static void cropImages(Path dir, Rectangle original, Rectangle extended) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    if (!extended.contains(original))
        throw new RuntimeException("Original rectangle must be totally contained in the extended rectangle. "
                + original + " is not contained in " + extended);

    for (FileStatus imageFile : allImages) {
        FSDataInputStream instream = fs.open(imageFile.getPath());
        BufferedImage img = ImageIO.read(instream);
        instream.close();

        int crop_x1 = (int) Math.floor((original.x1 - extended.x1) * img.getWidth() / extended.getWidth());
        int crop_y1 = (int) Math.floor((original.y1 - extended.y1) * img.getHeight() / extended.getHeight());
        int crop_x2 = (int) Math.ceil((original.x2 - extended.x1) * img.getWidth() / extended.getWidth());
        int crop_y2 = (int) Math.ceil((original.y2 - extended.y1) * img.getHeight() / extended.getHeight());
        // Ensure even height for compatibility with some codecs
        if ((crop_y2 - crop_y1) % 2 == 1)
            crop_y2++;

        BufferedImage cropped = new BufferedImage(crop_x2 - crop_x1, crop_y2 - crop_y1,
                BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = cropped.createGraphics();
        g.setBackground(new Color(0, true));
        g.clearRect(0, 0, cropped.getWidth(), cropped.getHeight());
        g.drawImage(img, 0, 0, cropped.getWidth(), cropped.getHeight(), crop_x1, crop_y1, crop_x2, crop_y2,
                null);
        g.dispose();

        FSDataOutputStream outstream = fs.create(imageFile.getPath(), true);
        ImageIO.write(cropped, "png", outstream);
        outstream.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

/**
 * Recover all images in the given dir. The passed directory contains a set of
 * images each corresponding to a specific date. The algorithm works in
 * iterations. In the first iteration, each image is used to recover holes in
 * the image that directly follows it. For example, the valid points in the
 * image of day 1 is used to recover missing points in the image of day 2. In
 * the second iteration, each image is used to recover missing points in the
 * preceding day. All odd numbered iterations work like the first iteration
 * while all even numbered iterations work like the second one. The algorithm
 * works like this until either all images are recovered or we run n iterations
 * where n is the total number of all images (time points).
 * /*from   w w w.  j ava  2s. c om*/
 * @param dir
 * @throws IOException 
 */
public static void recoverNearest(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    Arrays.sort(allImages, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            // Sort alphabetically based on file name
            return o1.getPath().getName().compareTo(o2.getPath().getName());
        }
    });
    // A sorted list of the index of all images that still have holes
    Vector<Integer> hollowImages = new Vector<Integer>();
    for (int i_image = 0; i_image < allImages.length; i_image++)
        hollowImages.add(i_image);
    for (int iter = 0; iter < allImages.length; iter++) {
        int i1, i2, increment;
        if (iter % 2 == 0) {
            i1 = hollowImages.size() - 2;
            i2 = -1;
            increment = -1;
        } else {
            i1 = 1;
            i2 = hollowImages.size();
            increment = 1;
        }
        for (int i_img = i1; i_img != i2; i_img += increment) {
            FSDataInputStream instream = fs.open(allImages[hollowImages.get(i_img)].getPath());
            BufferedImage img = ImageIO.read(instream);
            instream.close();
            instream = fs.open(allImages[hollowImages.get(i_img) - increment].getPath());
            BufferedImage img_bg = ImageIO.read(instream);
            instream.close();

            Graphics2D graphics = img_bg.createGraphics();
            graphics.drawImage(img, 0, 0, null);
            graphics.dispose();
            FSDataOutputStream outstream = fs.create(allImages[hollowImages.get(i_img)].getPath(), true);
            ImageIO.write(img_bg, "png", outstream);
            outstream.close();
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

/**
 * Recover all images in the given directory by performing an interpolation
 * on each missing point from the two nearest points on each side on its
 * horizontal line (the nearest left and right points).
 * /*from w ww  .ja  va 2 s  . co m*/
 * To determine which points should be interpolated (e.g., under cloud) and
 * which points should remain blank (e.g., in sea), we first overlay all
 * images on top of each other. If a point is missing from all images, it
 * indicates with very high probability that it should remain blank. 
 * @param dir
 * @throws IOException
 */
public static void recoverInterpolationDir(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);
    Arrays.sort(allImages, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            // Sort alphabetically based on file name
            return o1.getPath().getName().compareTo(o2.getPath().getName());
        }
    });

    // Create a mask of valid points by overlaying all images on each other
    BufferedImage mask = null;
    Graphics2D g = null;
    for (FileStatus imageFile : allImages) {
        FSDataInputStream instream = fs.open(imageFile.getPath());
        BufferedImage img = ImageIO.read(instream);
        instream.close();

        if (g == null) {
            mask = img;
            g = mask.createGraphics();
        } else {
            g.drawImage(img, 0, 0, null);
        }
    }
    g.dispose();

    // Recover missing points on each image
    for (FileStatus imageFile : allImages) {
        recoverImageInterpolation(fs, mask, imageFile);
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.RecoverHoles.java

License:Open Source License

public static void addDate(Path dir) throws IOException {
    FileSystem fs = dir.getFileSystem(new Configuration());
    FileStatus[] allImages = OperationsParams.isWildcard(dir) ? fs.globStatus(dir) : fs.listStatus(dir);

    final Font font = new Font("Arial", Font.BOLD, 48);
    final SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyy.MM.dd");
    final SimpleDateFormat outputDateFormat = new SimpleDateFormat("dd MMM");

    for (FileStatus imageFile : allImages) {
        try {/*from   w  w w  .j a va 2 s.c om*/
            FSDataInputStream instream = fs.open(imageFile.getPath());
            BufferedImage img = ImageIO.read(instream);
            instream.close();

            Graphics2D g = img.createGraphics();
            g.setFont(font);
            String filename = imageFile.getPath().getName();
            String dateStr = filename.substring(0, filename.length() - 4);
            Date date = inputDateFormat.parse(dateStr);
            String text = outputDateFormat.format(date);
            g.setColor(Color.BLACK);
            g.drawString(text, 5, img.getHeight() - 5);
            g.dispose();

            FSDataOutputStream outstream = fs.create(imageFile.getPath(), true);
            ImageIO.write(img, "png", outstream);
            outstream.close();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    switch (args.size()) {
    case 0://from  w  ww  .  ja va  2s. co  m
        return missingArg("WORKDIR");
    case 1:
        return missingArg("LEVELS");
    case 2:
        return missingArg("INPATH");
    default:
        break;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    levels = args.get(1).split(",");
    for (String l : levels) {
        try {
            int lvl = Integer.parseInt(l);
            if (lvl > 0)
                continue;
            System.err.printf("summarize :: summary level '%d' is not positive!\n", lvl);
        } catch (NumberFormatException e) {
            System.err.printf("summarize :: summary level '%s' is not an integer!\n", l);
        }
        return 3;
    }

    wrkDir = new Path(args.get(0));
    final Path bam = new Path(args.get(2));

    final boolean sort = parser.getBoolean(sortOpt);

    final Configuration conf = getConf();

    conf.setBoolean(AnySAMInputFormat.TRUST_EXTS_PROPERTY, !parser.getBoolean(noTrustExtsOpt));

    // Used by Utils.getMergeableWorkFile() to name the output files.
    wrkFile = bam.getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, wrkFile);

    conf.setStrings(SummarizeReducer.SUMMARY_LEVELS_PROP, levels);

    try {
        try {
            // There's a lot of different Paths here, and it can get a bit
            // confusing. Here's how it works:
            //
            // - outPath is the output dir for the final merged output, given
            //   with the -o parameter.
            //
            // - wrkDir is the user-given path where the outputs of the
            //   reducers go.
            //
            // - mergedTmpDir (defined further below) is $wrkDir/sort.tmp: if
            //   we are sorting, the summaries output in the first Hadoop job
            //   are merged in there.
            //
            // - mainSortOutputDir is $wrkDir/sorted.tmp: getSortOutputDir()
            //   gives a per-level/strand directory under it, which is used by
            //   doSorting() and mergeOne(). This is necessary because we
            //   cannot have multiple Hadoop jobs outputting into the same
            //   directory at the same time, as explained in the comment in
            //   sortMerged().

            // Required for path ".", for example.
            wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

            mainSortOutputDir = sort ? new Path(wrkDir, "sorted.tmp") : null;

            if (!runSummary(bam))
                return 4;
        } catch (IOException e) {
            System.err.printf("summarize :: Summarizing failed: %s\n", e);
            return 4;
        }

        Path mergedTmpDir = null;
        try {
            if (sort) {
                mergedTmpDir = new Path(wrkDir, "sort.tmp");
                mergeOutputs(mergedTmpDir);

            } else if (outPath != null)
                mergeOutputs(outPath);

        } catch (IOException e) {
            System.err.printf("summarize :: Merging failed: %s\n", e);
            return 5;
        }

        if (sort) {
            if (!doSorting(mergedTmpDir))
                return 6;

            // Reset this since SummarySort uses it.
            conf.set(Utils.WORK_FILENAME_PROPERTY, wrkFile);

            tryDelete(mergedTmpDir);

            if (outPath != null)
                try {
                    sorted = true;
                    mergeOutputs(outPath);
                } catch (IOException e) {
                    System.err.printf("summarize :: Merging sorted output failed: %s\n", e);
                    return 7;
                }
            else {
                // Move the unmerged results out of the mainSortOutputDir
                // subdirectories to wrkDir.

                System.out.println("summarize :: Moving outputs from temporary directories...");
                t.start();

                try {
                    final FileSystem fs = wrkDir.getFileSystem(conf);
                    for (String lvl : levels) {
                        final FileStatus[] parts;

                        try {
                            parts = fs.globStatus(new Path(new Path(mainSortOutputDir, lvl + "[fr]"),
                                    "*-[0-9][0-9][0-9][0-9][0-9][0-9]"));
                        } catch (IOException e) {
                            System.err.printf("summarize :: Couldn't move level %s results: %s", lvl, e);
                            continue;
                        }

                        for (FileStatus part : parts) {
                            final Path path = part.getPath();
                            try {
                                fs.rename(path, new Path(wrkDir, path.getName()));
                            } catch (IOException e) {
                                System.err.printf("summarize :: Couldn't move '%s': %s", path, e);
                            }
                        }
                    }
                } catch (IOException e) {
                    System.err.printf("summarize :: Moving results failed: %s", e);
                }
                System.out.printf("summarize :: Moved in %d.%03d s.\n", t.stopS(), t.fms());
            }
            tryDelete(mainSortOutputDir);
        }
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    return 0;
}