Example usage for org.apache.hadoop.fs ContentSummary getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ContentSummary getLength.

Prototype

public long getLength()

Source Link

Usage

From source file:BwaInterpreter.java

License:Open Source License

private void setTotalInputLength() {
    try {//from  w w  w  . j a  va 2 s .com
        FileSystem fs = FileSystem.get(this.conf);

        // To get the input files sizes
        ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath()));

        long lengthFile1 = cSummaryFile1.getLength();
        long lengthFile2 = 0;

        if (!options.getInputPath2().isEmpty()) {
            ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath()));
            lengthFile2 = cSummaryFile2.getLength();
        }

        // Total size. Depends on paired or single reads
        this.totalInputLength = lengthFile1 + lengthFile2;
        fs.close();
    } catch (IOException e) {
        LOG.error(e.toString());
        e.printStackTrace();
    }
}

From source file:BwaInterpreter.java

License:Open Source License

/**
 * Used to perform the sort operation in HDFS
 * @brief This function provides a method to perform the sort phase in HDFS
 * @author Jos M. Abun//from  w w  w. j av  a  2  s.c  o  m
 * @param fileName1 The first file that contains input FASTQ reads. Stored in HDFS
 * @param fileName2 The second file that contains input FASTQ reads. Stored in HDFS
 * @return A JavaRDD that contains the paired reads sorted
 */
public JavaRDD<Tuple2<String, String>> SortInHDFS2(String fileName1, String fileName2) {

    Configuration conf = this.conf;

    LOG.info("JMAbuin:: Starting writing reads to HDFS");

    try {
        FileSystem fs = FileSystem.get(conf);

        Path outputFilePath = new Path(this.inputTmpFileName);

        //To write the paired reads
        FSDataOutputStream outputFinalStream = fs.create(outputFilePath, true);

        //To read paired reads from both files
        BufferedReader brFastqFile1 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName1))));
        BufferedReader brFastqFile2 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName2))));

        String lineFastq1;
        String lineFastq2;

        lineFastq1 = brFastqFile1.readLine();
        lineFastq2 = brFastqFile2.readLine();

        //Loop to read two files. The two of them must have the same line numbers
        while (lineFastq1 != null) {
            //The lines are written interspersed
            outputFinalStream.write((lineFastq1 + "\n" + lineFastq2 + "\n").getBytes());

            //Next lines are readed
            lineFastq1 = brFastqFile1.readLine();
            lineFastq2 = brFastqFile2.readLine();
        }

        //Close the input and output files
        brFastqFile1.close();
        brFastqFile2.close();
        outputFinalStream.close();

        //Now it is time to read the previous created file and create the RDD
        ContentSummary cSummary = fs.getContentSummary(outputFilePath);

        long length = cSummary.getLength();

        this.totalInputLength = length;

        fs.close();

        //In case of the user does want partitioning
        if (this.options.getPartitionNumber() != 0) {

            //These options are set to indicate the split size and get the correct vnumber of partitions
            this.conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));
            this.conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));

            LOG.info("JMAbuin partitioning from HDFS:: "
                    + String.valueOf((length) / this.options.getPartitionNumber()));

            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).mapPartitions(new BigFastq2RDDPartitionsDouble(), true);

        } else {
            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).map(new BigFastq2RDDDouble());
        }

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error(e.toString());

        return null;
    }
}

From source file:BigBWA.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    for (String argumento : args) {
        LOG.info("Arg: " + argumento);
    }//from w w  w . java2 s.com

    String inputPath = "";
    String outputPath = "";

    boolean useReducer = false;

    BwaOptions options = new BwaOptions(args);

    //We set the timeout and stablish the bwa library to call BWA methods
    conf.set("mapreduce.task.timeout", "0");
    conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/");

    //==================Algorithm election==================
    //One of the algorithms is going to be in use, because tge default is always specified.
    if (options.isMemAlgorithm()) {
        //Case of the mem algorithm
        conf.set("mem", "true");
        conf.set("aln", "false");
        conf.set("bwasw", "false");
    }

    else if (options.isAlnAlgorithm()) {
        // Case of aln algorithm
        conf.set("mem", "false");
        conf.set("aln", "true");
        conf.set("bwasw", "false");
    }

    else if (options.isBwaswAlgorithm()) {
        // Case of bwasw algorithm
        conf.set("mem", "false");
        conf.set("aln", "false");
        conf.set("bwasw", "true");
    }

    //==================Index election==================
    if (options.getIndexPath() != "") {
        conf.set("indexRoute", options.getIndexPath());
    } else {
        System.err.println("No index has been found. Aborting.");
        System.exit(1);
    }

    //==================Type of reads election==================
    //There is always going to be a type of reads, because default is paired
    if (options.isPairedReads()) {
        conf.set("paired", "true");
        conf.set("single", "false");
    } else if (options.isSingleReads()) {
        conf.set("paired", "false");
        conf.set("single", "true");
    }

    //==================Use of reducer==================
    if (options.isUseReducer()) {
        useReducer = true;
        conf.set("useReducer", "true");
    } else {
        conf.set("useReducer", "false");
    }

    //==================Number of threads per map==================
    if (options.getNumThreads() != "0") {
        conf.set("bwathreads", options.getNumThreads());
    }

    //==================RG Header===================
    if (options.getReadgroupHeader() != "") {
        conf.set("rgheader", options.getReadgroupHeader());
    }

    //==================Input and output paths==================
    inputPath = options.getInputPath();
    outputPath = options.getOutputPath();

    conf.set("outputGenomics", outputPath);

    //==================Partition number==================
    if (options.getPartitionNumber() != 0) {
        try {
            FileSystem fs = FileSystem.get(conf);

            Path inputFilePath = new Path(inputPath);

            ContentSummary cSummary = fs.getContentSummary(inputFilePath);

            long length = cSummary.getLength();

            fs.close();

            conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / options.getPartitionNumber()));
            conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / options.getPartitionNumber()));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            LOG.error(e.toString());

            System.exit(1);
        }

    }

    //Job job = new Job(conf,"BigBWA_"+outputPath);
    Job job = Job.getInstance(conf, "BigBWA_" + outputPath);

    job.setJarByClass(BigBWA.class);
    job.setMapperClass(BigBWAMap.class);
    //job.setCombinerClass(BigBWACombiner.class);

    if (useReducer) {
        job.setReducerClass(BigBWAReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(1);
    } else {
        job.setNumReduceTasks(0);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.bigstep.datalake.JsonUtil.java

License:Apache License

/** Convert a ContentSummary to a Json string. */
public static String toJsonString(final ContentSummary contentsummary) {
    if (contentsummary == null) {
        return null;
    }/*from   ww  w  . ja v  a2s  . com*/

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("length", contentsummary.getLength());
    m.put("fileCount", contentsummary.getFileCount());
    m.put("directoryCount", contentsummary.getDirectoryCount());
    m.put("quota", contentsummary.getQuota());
    m.put("spaceConsumed", contentsummary.getSpaceConsumed());
    m.put("spaceQuota", contentsummary.getSpaceQuota());
    return toJsonString(ContentSummary.class, m);
}

From source file:com.github.seqware.queryengine.tutorial.Poster.java

License:Open Source License

private long convertToGB(ContentSummary contentSummary) {
    // odd, it seems like length reports the equivalent of "hadoop fs -du -s
    long spaceConsumedinGB = contentSummary.getLength() / 1024 / 1024 / 1024;
    return spaceConsumedinGB;
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

private static int[] getFilesTotals(FileSystem fs, Path[] activityFiles) {
    int tbc = 0;/*from   www.jav  a  2 s . co m*/
    int tlc = 0;
    if (ArrayUtils.isNotEmpty(activityFiles)) {
        for (Path f : activityFiles) {
            try {
                ContentSummary cSummary = fs.getContentSummary(f);
                tbc += cSummary.getLength();
                tlc += Utils.countLines(fs.open(f));
            } catch (IOException exc) {
            }
        }
    }

    return new int[] { tbc, tlc };
}

From source file:org.apache.falcon.regression.core.util.AssertUtil.java

License:Apache License

/**
 * Checks size of the content a two locations.
 *
 * @param firstPath  path to the first location
 * @param secondPath path to the second location
 * @param fs         hadoop file system for the locations
 * @throws IOException//from w w  w  . j a  va2s .c  om
 */
public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException {
    final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath));
    final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath));
    LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false));
    LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false));
    Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(),
            "Contents at the two locations don't have same size.");
}

From source file:org.apache.falcon.regression.ExternalFSTest.java

License:Apache License

@Test(dataProvider = "getData")
public void replicateToExternalFS(final FileSystem externalFS, final String separator, final boolean withData)
        throws Exception {
    final String endpoint = externalFS.getUri().toString();
    Bundle.submitCluster(bundles[0], externalBundle);
    String startTime = TimeUtil.getTimeWrtSystemTime(0);
    String endTime = TimeUtil.addMinsToTime(startTime, 5);
    LOGGER.info("Time range between : " + startTime + " and " + endTime);
    String datePattern = StringUtils
            .join(new String[] { "${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}" }, separator);

    //configure feed
    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    String targetDataLocation = endpoint + testWasbTargetDir + datePattern;
    feed.setFilePath(sourcePath + '/' + datePattern);
    //erase all clusters from feed definition
    feed.clearFeedClusters();/*from w w w .  ja  v a2s  .co  m*/
    //set local cluster as source
    feed.addFeedCluster(new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
            .withClusterType(ClusterType.SOURCE).build());
    //set externalFS cluster as target
    feed.addFeedCluster(
            new FeedMerlin.FeedClusterBuilder(Util.readEntityName(externalBundle.getClusters().get(0)))
                    .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
                    .withClusterType(ClusterType.TARGET).withDataLocation(targetDataLocation).build());

    //submit and schedule feed
    LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
    datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm" }, separator);
    //upload necessary data
    DateTime date = new DateTime(startTime, DateTimeZone.UTC);
    DateTimeFormatter fmt = DateTimeFormat.forPattern(datePattern);
    String timePattern = fmt.print(date);
    HadoopUtil.recreateDir(clusterFS, sourcePath + '/' + timePattern);
    if (withData) {
        HadoopUtil.copyDataToFolder(clusterFS, sourcePath + '/' + timePattern, OSUtil.SINGLE_FILE);
    }

    Path srcPath = new Path(sourcePath + '/' + timePattern);
    Path dstPath = new Path(endpoint + testWasbTargetDir + '/' + timePattern);

    //check if coordinator exists
    TimeUtil.sleepSeconds(10);
    InstanceUtil.waitTillInstancesAreCreated(clusterOC, feed.toString(), 0);
    Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(clusterOC, feed.getName(), "REPLICATION"), 1);

    //replication should start, wait while it ends
    InstanceUtil.waitTillInstanceReachState(clusterOC, Util.readEntityName(feed.toString()), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);

    //check if data has been replicated correctly
    List<Path> cluster1ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, srcPath);
    List<Path> cluster2ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(externalFS, dstPath);
    AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
    final ContentSummary srcSummary = clusterFS.getContentSummary(srcPath);
    final ContentSummary dstSummary = externalFS.getContentSummary(dstPath);
    Assert.assertEquals(dstSummary.getLength(), srcSummary.getLength());
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputOneHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);//  w w  w  . j av  a 2  s .c  o m
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";

    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);

    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);

    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputOneHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputTwoHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);/*from  w ww  .j  av a 2 s  .c  om*/
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir2).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";
    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);
    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    String outputTableUri2 = "catalog:" + dbName + ":" + outputTableName2 + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);
    final String outputFeed1 = bundles[0].getOutputFeedFromBundle();
    final String outputFeed2Name = Util.readEntityName(outputFeed1) + "-second";
    FeedMerlin feedObj2 = new FeedMerlin(outputFeed1);
    feedObj2.setName(outputFeed2Name);
    feedObj2.getTable().setUri(outputTableUri2);
    bundles[0].addOutputFeedToBundle("outputData2", feedObj2);
    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputTwoHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    final ContentSummary outputContentSummary2 = clusterFS
            .getContentSummary(new Path(outputHDFSDir2 + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    LOGGER.info("outputContentSummary2 = " + outputContentSummary2.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary2.getLength(), "Unexpected size of the output.");
}