Example usage for org.apache.hadoop.fs ContentSummary getLength

List of usage examples for org.apache.hadoop.fs ContentSummary getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ContentSummary getLength.

Prototype

public long getLength() 

Source Link

Usage

From source file:BwaInterpreter.java

License:Open Source License

private void setTotalInputLength() {
    try {//from  w w  w  . j a  va 2 s .com
        FileSystem fs = FileSystem.get(this.conf);

        // To get the input files sizes
        ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath()));

        long lengthFile1 = cSummaryFile1.getLength();
        long lengthFile2 = 0;

        if (!options.getInputPath2().isEmpty()) {
            ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath()));
            lengthFile2 = cSummaryFile2.getLength();
        }

        // Total size. Depends on paired or single reads
        this.totalInputLength = lengthFile1 + lengthFile2;
        fs.close();
    } catch (IOException e) {
        LOG.error(e.toString());
        e.printStackTrace();
    }
}

From source file:BwaInterpreter.java

License:Open Source License

/**
 * Used to perform the sort operation in HDFS
 * @brief This function provides a method to perform the sort phase in HDFS
 * @author Jos M. Abun//from  w w  w. j av  a  2  s.c  o  m
 * @param fileName1 The first file that contains input FASTQ reads. Stored in HDFS
 * @param fileName2 The second file that contains input FASTQ reads. Stored in HDFS
 * @return A JavaRDD that contains the paired reads sorted
 */
public JavaRDD<Tuple2<String, String>> SortInHDFS2(String fileName1, String fileName2) {

    Configuration conf = this.conf;

    LOG.info("JMAbuin:: Starting writing reads to HDFS");

    try {
        FileSystem fs = FileSystem.get(conf);

        Path outputFilePath = new Path(this.inputTmpFileName);

        //To write the paired reads
        FSDataOutputStream outputFinalStream = fs.create(outputFilePath, true);

        //To read paired reads from both files
        BufferedReader brFastqFile1 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName1))));
        BufferedReader brFastqFile2 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName2))));

        String lineFastq1;
        String lineFastq2;

        lineFastq1 = brFastqFile1.readLine();
        lineFastq2 = brFastqFile2.readLine();

        //Loop to read two files. The two of them must have the same line numbers
        while (lineFastq1 != null) {
            //The lines are written interspersed
            outputFinalStream.write((lineFastq1 + "\n" + lineFastq2 + "\n").getBytes());

            //Next lines are readed
            lineFastq1 = brFastqFile1.readLine();
            lineFastq2 = brFastqFile2.readLine();
        }

        //Close the input and output files
        brFastqFile1.close();
        brFastqFile2.close();
        outputFinalStream.close();

        //Now it is time to read the previous created file and create the RDD
        ContentSummary cSummary = fs.getContentSummary(outputFilePath);

        long length = cSummary.getLength();

        this.totalInputLength = length;

        fs.close();

        //In case of the user does want partitioning
        if (this.options.getPartitionNumber() != 0) {

            //These options are set to indicate the split size and get the correct vnumber of partitions
            this.conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));
            this.conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));

            LOG.info("JMAbuin partitioning from HDFS:: "
                    + String.valueOf((length) / this.options.getPartitionNumber()));

            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).mapPartitions(new BigFastq2RDDPartitionsDouble(), true);

        } else {
            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).map(new BigFastq2RDDDouble());
        }

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error(e.toString());

        return null;
    }
}

From source file:BigBWA.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    for (String argumento : args) {
        LOG.info("Arg: " + argumento);
    }//from w w  w . java2 s.com

    String inputPath = "";
    String outputPath = "";

    boolean useReducer = false;

    BwaOptions options = new BwaOptions(args);

    //We set the timeout and stablish the bwa library to call BWA methods
    conf.set("mapreduce.task.timeout", "0");
    conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/");

    //==================Algorithm election==================
    //One of the algorithms is going to be in use, because tge default is always specified.
    if (options.isMemAlgorithm()) {
        //Case of the mem algorithm
        conf.set("mem", "true");
        conf.set("aln", "false");
        conf.set("bwasw", "false");
    }

    else if (options.isAlnAlgorithm()) {
        // Case of aln algorithm
        conf.set("mem", "false");
        conf.set("aln", "true");
        conf.set("bwasw", "false");
    }

    else if (options.isBwaswAlgorithm()) {
        // Case of bwasw algorithm
        conf.set("mem", "false");
        conf.set("aln", "false");
        conf.set("bwasw", "true");
    }

    //==================Index election==================
    if (options.getIndexPath() != "") {
        conf.set("indexRoute", options.getIndexPath());
    } else {
        System.err.println("No index has been found. Aborting.");
        System.exit(1);
    }

    //==================Type of reads election==================
    //There is always going to be a type of reads, because default is paired
    if (options.isPairedReads()) {
        conf.set("paired", "true");
        conf.set("single", "false");
    } else if (options.isSingleReads()) {
        conf.set("paired", "false");
        conf.set("single", "true");
    }

    //==================Use of reducer==================
    if (options.isUseReducer()) {
        useReducer = true;
        conf.set("useReducer", "true");
    } else {
        conf.set("useReducer", "false");
    }

    //==================Number of threads per map==================
    if (options.getNumThreads() != "0") {
        conf.set("bwathreads", options.getNumThreads());
    }

    //==================RG Header===================
    if (options.getReadgroupHeader() != "") {
        conf.set("rgheader", options.getReadgroupHeader());
    }

    //==================Input and output paths==================
    inputPath = options.getInputPath();
    outputPath = options.getOutputPath();

    conf.set("outputGenomics", outputPath);

    //==================Partition number==================
    if (options.getPartitionNumber() != 0) {
        try {
            FileSystem fs = FileSystem.get(conf);

            Path inputFilePath = new Path(inputPath);

            ContentSummary cSummary = fs.getContentSummary(inputFilePath);

            long length = cSummary.getLength();

            fs.close();

            conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / options.getPartitionNumber()));
            conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / options.getPartitionNumber()));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            LOG.error(e.toString());

            System.exit(1);
        }

    }

    //Job job = new Job(conf,"BigBWA_"+outputPath);
    Job job = Job.getInstance(conf, "BigBWA_" + outputPath);

    job.setJarByClass(BigBWA.class);
    job.setMapperClass(BigBWAMap.class);
    //job.setCombinerClass(BigBWACombiner.class);

    if (useReducer) {
        job.setReducerClass(BigBWAReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(1);
    } else {
        job.setNumReduceTasks(0);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.bigstep.datalake.JsonUtil.java

License:Apache License

/** Convert a ContentSummary to a Json string. */
public static String toJsonString(final ContentSummary contentsummary) {
    if (contentsummary == null) {
        return null;
    }/*from   ww  w  . ja v  a2s  . com*/

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("length", contentsummary.getLength());
    m.put("fileCount", contentsummary.getFileCount());
    m.put("directoryCount", contentsummary.getDirectoryCount());
    m.put("quota", contentsummary.getQuota());
    m.put("spaceConsumed", contentsummary.getSpaceConsumed());
    m.put("spaceQuota", contentsummary.getSpaceQuota());
    return toJsonString(ContentSummary.class, m);
}

From source file:com.github.seqware.queryengine.tutorial.Poster.java

License:Open Source License

private long convertToGB(ContentSummary contentSummary) {
    // odd, it seems like length reports the equivalent of "hadoop fs -du -s
    long spaceConsumedinGB = contentSummary.getLength() / 1024 / 1024 / 1024;
    return spaceConsumedinGB;
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

private static int[] getFilesTotals(FileSystem fs, Path[] activityFiles) {
    int tbc = 0;/*from   www.jav  a  2 s . co m*/
    int tlc = 0;
    if (ArrayUtils.isNotEmpty(activityFiles)) {
        for (Path f : activityFiles) {
            try {
                ContentSummary cSummary = fs.getContentSummary(f);
                tbc += cSummary.getLength();
                tlc += Utils.countLines(fs.open(f));
            } catch (IOException exc) {
            }
        }
    }

    return new int[] { tbc, tlc };
}

From source file:org.apache.falcon.regression.core.util.AssertUtil.java

License:Apache License

/**
 * Checks size of the content a two locations.
 *
 * @param firstPath  path to the first location
 * @param secondPath path to the second location
 * @param fs         hadoop file system for the locations
 * @throws IOException//from w w  w  . j a  va2s .c  om
 */
public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException {
    final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath));
    final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath));
    LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false));
    LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false));
    Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(),
            "Contents at the two locations don't have same size.");
}

From source file:org.apache.falcon.regression.ExternalFSTest.java

License:Apache License

@Test(dataProvider = "getData")
public void replicateToExternalFS(final FileSystem externalFS, final String separator, final boolean withData)
        throws Exception {
    final String endpoint = externalFS.getUri().toString();
    Bundle.submitCluster(bundles[0], externalBundle);
    String startTime = TimeUtil.getTimeWrtSystemTime(0);
    String endTime = TimeUtil.addMinsToTime(startTime, 5);
    LOGGER.info("Time range between : " + startTime + " and " + endTime);
    String datePattern = StringUtils
            .join(new String[] { "${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}" }, separator);

    //configure feed
    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    String targetDataLocation = endpoint + testWasbTargetDir + datePattern;
    feed.setFilePath(sourcePath + '/' + datePattern);
    //erase all clusters from feed definition
    feed.clearFeedClusters();/*from w w w .  ja  v a2s  .co  m*/
    //set local cluster as source
    feed.addFeedCluster(new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
            .withClusterType(ClusterType.SOURCE).build());
    //set externalFS cluster as target
    feed.addFeedCluster(
            new FeedMerlin.FeedClusterBuilder(Util.readEntityName(externalBundle.getClusters().get(0)))
                    .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
                    .withClusterType(ClusterType.TARGET).withDataLocation(targetDataLocation).build());

    //submit and schedule feed
    LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
    datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm" }, separator);
    //upload necessary data
    DateTime date = new DateTime(startTime, DateTimeZone.UTC);
    DateTimeFormatter fmt = DateTimeFormat.forPattern(datePattern);
    String timePattern = fmt.print(date);
    HadoopUtil.recreateDir(clusterFS, sourcePath + '/' + timePattern);
    if (withData) {
        HadoopUtil.copyDataToFolder(clusterFS, sourcePath + '/' + timePattern, OSUtil.SINGLE_FILE);
    }

    Path srcPath = new Path(sourcePath + '/' + timePattern);
    Path dstPath = new Path(endpoint + testWasbTargetDir + '/' + timePattern);

    //check if coordinator exists
    TimeUtil.sleepSeconds(10);
    InstanceUtil.waitTillInstancesAreCreated(clusterOC, feed.toString(), 0);
    Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(clusterOC, feed.getName(), "REPLICATION"), 1);

    //replication should start, wait while it ends
    InstanceUtil.waitTillInstanceReachState(clusterOC, Util.readEntityName(feed.toString()), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);

    //check if data has been replicated correctly
    List<Path> cluster1ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, srcPath);
    List<Path> cluster2ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(externalFS, dstPath);
    AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
    final ContentSummary srcSummary = clusterFS.getContentSummary(srcPath);
    final ContentSummary dstSummary = externalFS.getContentSummary(dstPath);
    Assert.assertEquals(dstSummary.getLength(), srcSummary.getLength());
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputOneHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);//  w w  w  . j av  a 2  s .c  o m
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";

    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);

    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);

    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputOneHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputTwoHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);/*from  w ww  .j  av a 2 s  .c  om*/
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir2).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";
    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);
    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    String outputTableUri2 = "catalog:" + dbName + ":" + outputTableName2 + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);
    final String outputFeed1 = bundles[0].getOutputFeedFromBundle();
    final String outputFeed2Name = Util.readEntityName(outputFeed1) + "-second";
    FeedMerlin feedObj2 = new FeedMerlin(outputFeed1);
    feedObj2.setName(outputFeed2Name);
    feedObj2.getTable().setUri(outputTableUri2);
    bundles[0].addOutputFeedToBundle("outputData2", feedObj2);
    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputTwoHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    final ContentSummary outputContentSummary2 = clusterFS
            .getContentSummary(new Path(outputHDFSDir2 + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    LOGGER.info("outputContentSummary2 = " + outputContentSummary2.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary2.getLength(), "Unexpected size of the output.");
}