Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:com.twitter.elephanttwin.retrieval.IndexedPigLoader.java

License:Apache License

private String[] getUnionedPartitionKeys(String location, Job job) throws IOException {
    /**/*  w w  w  .  j  a v a 2 s .c  om*/
     * report what columns have been indexed before. The current implementation
     * only reports the columns indexed on all input files the PigLoader need to
     * work on. This is done by inspecting the FileIndexDesriptor of each input
     * file
     */

    if (location == null || location.equals(""))
        return null;

    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);

    if (!fs.exists(new Path(indexDir))) {
        LOG.info("index dir:" + indexDir + " does not exist, no indexes will be used");
        return null;
    }
    LOG.info("checking directory:" + new Path(indexDir + new Path(location).toUri().getPath()));
    FileStatus[] fileStatues = fs.globStatus(new Path(indexDir + new Path(location).toUri().getPath()));

    if (fileStatues == null || fileStatues.length == 0) {
        LOG.info("index dir:" + indexDir + location + " does not have indexes, no indexes will be used");
        return null;
    }

    // return all indexed column names from all base file under location which have been previously indexed.
    HashSet<String> indexedColumns = new HashSet<String>();
    List<FileStatus> indexMetaFiles = new ArrayList<FileStatus>();
    for (FileStatus status : fileStatues) {
        HdfsUtils.addInputPathRecursively(indexMetaFiles, fs, status.getPath(), HdfsUtils.hiddenDirectoryFilter,
                indexMetaPathFilter);
    }
    LOG.info("found " + indexMetaFiles.size() + " index descriptor files");

    for (FileStatus indexMetafile : indexMetaFiles) {
        FSDataInputStream in = fs.open(indexMetafile.getPath());
        ThriftWritable<FileIndexDescriptor> writable = ThriftWritable.newInstance(FileIndexDescriptor.class);
        writable.readFields(in);
        FileIndexDescriptor indexDescriptor = writable.get();

        List<IndexedField> indexedFields = indexDescriptor.getIndexedFields();
        in.close();
        for (IndexedField field : indexedFields) {
            String colName = field.getFieldName();
            indexedColumns.add(colName);
        }
    }

    if (indexedColumns.size() == 0) {
        return null;
    }

    return indexedColumns.toArray(new String[indexedColumns.size()]);
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Concatenate the content of all HDFS files matching {@code hdfsGlob} into a
 * file {@code localFilename}.//from w w w  .j  av a  2  s  . co m
 *
 * @param hdfsNameNode The name of the Hadoop name node.
 * @param hdfsGlob Files matching this pattern will be fetched.
 * @param localFilename Name of local file to store concatenated content.
 * @return The newly created file.
 * @throws IOException when the file cannot be created/written.
 */
public static File getHdfsFiles(String hdfsNameNode, String hdfsGlob, String localFilename) throws IOException {
    Preconditions.checkNotNull(localFilename);
    Preconditions.checkNotNull(hdfsGlob);
    Preconditions.checkNotNull(hdfsNameNode);
    // init the FS connection and the local file.
    Configuration config = new Configuration();
    config.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, hdfsNameNode);
    FileSystem dfs = FileSystem.get(config);
    File localFile = new File(localFilename);
    FileOutputStream localStream = new FileOutputStream(localFile);

    // get files that need downloading.
    FileStatus[] statuses = dfs.globStatus(new Path(hdfsGlob));
    LOG.info("Pattern " + hdfsGlob + " matched " + statuses.length + " HDFS files, " + "fetching to "
            + localFile.getCanonicalPath() + "...");

    // append each file.
    int copiedChars = 0;
    FSDataInputStream remoteStream = null;
    for (FileStatus status : statuses) {
        Path src = status.getPath();
        try {
            remoteStream = dfs.open(src);
            copiedChars += IOUtils.copy(remoteStream, localStream);
        } catch (IOException e) {
            LOG.severe("Failed to open/copy " + src);
        } finally {
            IOUtils.closeQuietly(remoteStream);
        }
    }
    LOG.info("Fetch " + copiedChars + " bytes to local FS");
    return localFile;
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Returns {@link FileStatus} instances for all part files beneath the given parent URI.
 *
 * @param fs file system with which to retrieve part file status.
 * @param parent the parent URI within which part files should be globbed.
 * @return status of part files within parent URI.
 * @throws IOException/*from   w  w w  .j a v  a  2 s  .  c  o m*/
 */
public static List<FileStatus> partFileStatus(FileSystem fs, URI parent) throws IOException {
    return Lists.newArrayList(fs.globStatus(new Path(new Path(parent), PART_FILE_GLOB)));
}

From source file:com.uber.hoodie.cli.commands.FileSystemViewCommand.java

License:Apache License

/**
 * Build File System View//from  w w  w. j a va  2s  .co  m
 * @param globRegex Path Regex
 * @param maxInstant  Max Instants to be used for displaying file-instants
 * @param readOptimizedOnly Include only read optimized view
 * @param includeMaxInstant Include Max instant
 * @param includeInflight   Include inflight instants
 * @param excludeCompaction Exclude Compaction instants
 * @return
 * @throws IOException
 */
private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant,
        boolean readOptimizedOnly, boolean includeMaxInstant, boolean includeInflight,
        boolean excludeCompaction) throws IOException {
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieCLI.tableMetadata.getHadoopConf(),
            HoodieCLI.tableMetadata.getBasePath(), true);
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
    FileStatus[] statuses = fs.globStatus(new Path(globPath));
    Stream<HoodieInstant> instantsStream = null;

    HoodieTimeline timeline = null;
    if (readOptimizedOnly) {
        timeline = metaClient.getActiveTimeline().getCommitTimeline();
    } else if (excludeCompaction) {
        timeline = metaClient.getActiveTimeline().getCommitsTimeline();
    } else {
        timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
    }

    if (!includeInflight) {
        timeline = timeline.filterCompletedInstants();
    }

    instantsStream = timeline.getInstants();

    if (!maxInstant.isEmpty()) {
        final BiPredicate<String, String> predicate;
        if (includeMaxInstant) {
            predicate = HoodieTimeline.GREATER_OR_EQUAL;
        } else {
            predicate = HoodieTimeline.GREATER;
        }
        instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
    }

    HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream,
            (Function<HoodieInstant, Optional<byte[]>> & Serializable) metaClient
                    .getActiveTimeline()::getInstantDetails);
    return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses);
}

From source file:com.uber.hoodie.cli.commands.HoodieLogFileCommand.java

License:Apache License

@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(
        @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern,
        @CliOption(key = {/*from  www  .j  a  v  a2 s  .co  m*/
                "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
        @CliOption(key = {
                "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
        @CliOption(key = {
                "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
        @CliOption(key = {
                "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
        throws IOException {

    FileSystem fs = HoodieCLI.tableMetadata.getFs();
    List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
            .map(status -> status.getPath().toString()).collect(Collectors.toList());
    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Maps
            .newHashMap();
    int totalEntries = 0;
    int numCorruptBlocks = 0;
    int dummyInstantTimeCount = 0;

    for (String logFilePath : logFilePaths) {
        FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
        Schema writerSchema = new AvroSchemaConverter().convert(
                SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
        HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()),
                writerSchema);

        // read the avro blocks
        while (reader.hasNext()) {
            HoodieLogBlock n = reader.next();
            String instantTime;
            int recordCount = 0;
            if (n instanceof HoodieCorruptBlock) {
                try {
                    instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                    if (instantTime == null) {
                        throw new Exception("Invalid instant time " + instantTime);
                    }
                } catch (Exception e) {
                    numCorruptBlocks++;
                    instantTime = "corrupt_block_" + numCorruptBlocks;
                    // could not read metadata for corrupt block
                }
            } else {
                instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                if (instantTime == null) {
                    // This can happen when reading archived commit files since they were written without any instant time
                    dummyInstantTimeCount++;
                    instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
                }
                if (n instanceof HoodieAvroDataBlock) {
                    recordCount = ((HoodieAvroDataBlock) n).getRecords().size();
                }
            }
            if (commitCountAndMetadata.containsKey(instantTime)) {
                commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(),
                        new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
                totalEntries++;
            } else {
                List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>();
                list.add(new Tuple3<>(n.getBlockType(),
                        new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
                commitCountAndMetadata.put(instantTime, list);
                totalEntries++;
            }
        }
        reader.close();
    }
    List<Comparable[]> rows = new ArrayList<>();
    int i = 0;
    ObjectMapper objectMapper = new ObjectMapper();
    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata
            .entrySet()) {
        String instantTime = entry.getKey().toString();
        for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry
                .getValue()) {
            Comparable[] output = new Comparable[5];
            output[0] = instantTime;
            output[1] = tuple3._3();
            output[2] = tuple3._1().toString();
            output[3] = objectMapper.writeValueAsString(tuple3._2()._1());
            output[4] = objectMapper.writeValueAsString(tuple3._2()._2());
            rows.add(output);
            i++;
        }
    }

    TableHeader header = new TableHeader().addTableHeaderField("InstantTime").addTableHeaderField("RecordCount")
            .addTableHeaderField("BlockType").addTableHeaderField("HeaderMetadata")
            .addTableHeaderField("FooterMetadata");

    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}

From source file:com.uber.hoodie.cli.commands.HoodieLogFileCommand.java

License:Apache License

@CliCommand(value = "show logfile records", help = "Read records from log files")
public String showLogFileRecords(@CliOption(key = {
        "limit" }, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit,
        @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified paths for the log files") final String logFilePathPattern,
        @CliOption(key = "mergeRecords", mandatory = false, help = "If the records in the log files should be merged", unspecifiedDefaultValue = "false") final Boolean shouldMerge)
        throws IOException {

    System.out.println("===============> Showing only " + limit + " records <===============");

    FileSystem fs = HoodieCLI.tableMetadata.getFs();
    List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
            .map(status -> status.getPath().toString()).collect(Collectors.toList());

    // TODO : readerSchema can change across blocks/log files, fix this inside Scanner
    AvroSchemaConverter converter = new AvroSchemaConverter();
    // get schema from last log file
    Schema readerSchema = converter
            .convert(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));

    List<IndexedRecord> allRecords = new ArrayList<>();

    if (shouldMerge) {
        System.out.println("===========================> MERGING RECORDS <===================");
        HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs,
                HoodieCLI.tableMetadata.getBasePath(), logFilePaths, readerSchema,
                HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
                        .getTimestamp(),
                Long.valueOf(HoodieMemoryConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES),
                Boolean.valueOf(HoodieCompactionConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED),
                Boolean.valueOf(HoodieCompactionConfig.DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED),
                Integer.valueOf(HoodieMemoryConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE),
                HoodieMemoryConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH);
        for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
            Optional<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
            if (allRecords.size() >= limit) {
                break;
            }/*from   w ww.j  av  a2s .  c  o m*/
            allRecords.add(record.get());
        }
    } else {
        for (String logFile : logFilePaths) {
            Schema writerSchema = new AvroSchemaConverter().convert(
                    SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
            HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFile)),
                    writerSchema);
            // read the avro blocks
            while (reader.hasNext()) {
                HoodieLogBlock n = reader.next();
                if (n instanceof HoodieAvroDataBlock) {
                    HoodieAvroDataBlock blk = (HoodieAvroDataBlock) n;
                    List<IndexedRecord> records = blk.getRecords();
                    allRecords.addAll(records);
                    if (allRecords.size() >= limit) {
                        break;
                    }
                }
            }
            reader.close();
            if (allRecords.size() >= limit) {
                break;
            }
        }
    }
    String[][] rows = new String[allRecords.size() + 1][];
    int i = 0;
    for (IndexedRecord record : allRecords) {
        String[] data = new String[1];
        data[0] = record.toString();
        rows[i] = data;
        i++;
    }
    return HoodiePrintHelper.print(new String[] { "Records" }, rows);
}

From source file:com.uber.hoodie.cli.commands.StatsCommand.java

License:Apache License

@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
public String fileSizeStats(@CliOption(key = {
        "partitionPath" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final String globRegex,
        @CliOption(key = {/*  ww w. j  a  va 2 s . c  om*/
                "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
        @CliOption(key = {
                "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
        @CliOption(key = {
                "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
        @CliOption(key = {
                "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
        throws IOException {

    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
    FileStatus[] statuses = fs.globStatus(new Path(globPath));

    // max, min, #small files < 10MB, 50th, avg, 95th
    Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
    HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
    for (FileStatus fileStatus : statuses) {
        String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
        long sz = fileStatus.getLen();
        if (!commitHistoMap.containsKey(commitTime)) {
            commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
        }
        commitHistoMap.get(commitTime).update(sz);
        globalHistogram.update(sz);
    }

    List<Comparable[]> rows = new ArrayList<>();
    int ind = 0;
    for (String commitTime : commitHistoMap.keySet()) {
        Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
        rows.add(printFileSizeHistogram(commitTime, s));
    }
    Snapshot s = globalHistogram.getSnapshot();
    rows.add(printFileSizeHistogram("ALL", s));

    Function<Object, String> converterFunction = entry -> NumericUtils
            .humanReadableByteCount((Double.valueOf(entry.toString())));
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put("Min", converterFunction);
    fieldNameToConverterMap.put("10th", converterFunction);
    fieldNameToConverterMap.put("50th", converterFunction);
    fieldNameToConverterMap.put("avg", converterFunction);
    fieldNameToConverterMap.put("95th", converterFunction);
    fieldNameToConverterMap.put("Max", converterFunction);
    fieldNameToConverterMap.put("StdDev", converterFunction);

    TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Min")
            .addTableHeaderField("10th").addTableHeaderField("50th").addTableHeaderField("avg")
            .addTableHeaderField("95th").addTableHeaderField("Max").addTableHeaderField("NumFiles")
            .addTableHeaderField("StdDev");
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly,
            rows);
}

From source file:com.uber.hoodie.common.HoodieClientTestUtils.java

License:Apache License

/**
 * Reads the paths under the a hoodie dataset out as a DataFrame
 *///from www  . j  a va  2  s.  c o m
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs,
        String... paths) {
    List<String> filteredPaths = new ArrayList<>();
    try {
        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
        for (String path : paths) {
            TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(metaClient,
                    metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
            List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList());
            for (HoodieDataFile file : latestFiles) {
                filteredPaths.add(file.getPath());
            }
        }
        return sqlContext.read().parquet(filteredPaths.toArray(new String[filteredPaths.size()]));
    } catch (Exception e) {
        throw new HoodieException("Error reading hoodie dataset as a dataframe", e);
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }/*  w  w w .  j a  v a  2s .  c  om*/
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}

From source file:com.yahoo.glimmer.util.MergeSortTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    SimpleJSAP jsap = new SimpleJSAP(MergeSortTool.class.getName(),
            "Merges alpha numerically sorted text files on HDFS",
            new Parameter[] {
                    new FlaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'i',
                            INPUT_ARG, "input filenames glob eg. .../part-r-?????/sortedlines.text"),
                    new FlaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
                            OUTPUT_ARG, "output filename"),
                    new FlaggedOption(COUNT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c',
                            COUNT_ARG,/*from   w w  w  .ja  v  a  2 s .  c o m*/
                            "optionally create a file containing a count of the number of lines merged in text"), });

    JSAPResult jsapResult = jsap.parse(args);
    if (!jsapResult.success()) {
        System.err.print(jsap.getUsage());
        System.exit(1);
    }

    // FileSystem fs = FileSystem.get(getConf());
    // CompressionCodecFactory factory = new
    // CompressionCodecFactory(getConf());
    // mergeSort(fs, sourcePaths, outputPath, factory);

    // Maybe quicker to use a MR job with one reducer.. Currently
    // decompression, merge and compression are all done in this thread..

    Path inputGlobPath = new Path(jsapResult.getString(INPUT_ARG));

    Configuration config = getConf();
    FileSystem fs = FileSystem.get(config);

    FileStatus[] sources = fs.globStatus(inputGlobPath);

    if (sources.length == 0) {
        System.err.println("No files matching input glob:" + inputGlobPath.toString());
        return 1;
    }

    List<Path> sourcePaths = new ArrayList<Path>(sources.length);
    for (FileStatus source : sources) {
        if (source.isDirectory()) {
            System.err.println(source.getPath().toString() + " is a directory.");
            return 1;
        }
        sourcePaths.add(source.getPath());
    }

    Path outputPath = new Path(jsapResult.getString(OUTPUT_ARG));

    CompressionCodecFactory factory = new CompressionCodecFactory(config);

    FSDataOutputStream countsOutputStream = null;
    if (jsapResult.contains(COUNT_ARG)) {
        Path countsPath = null;
        countsPath = new Path(jsapResult.getString(COUNT_ARG));
        countsOutputStream = fs.create(countsPath);
    }

    int lineCount = MergeSortTool.mergeSort(fs, sourcePaths, outputPath, factory);
    System.out.println("Merged " + lineCount + " lines into " + outputPath.toString());
    if (countsOutputStream != null) {
        countsOutputStream.writeBytes("" + lineCount + '\n');
    }
    countsOutputStream.flush();
    countsOutputStream.close();

    return 0;
}