Example usage for org.apache.hadoop.fs ContentSummary toString

List of usage examples for org.apache.hadoop.fs ContentSummary toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ContentSummary toString.

Prototype

@Override
public String toString(boolean qOption) 

Source Link

Document

Return the string representation of the object in the output format.

Usage

From source file:org.apache.falcon.regression.core.util.AssertUtil.java

License:Apache License

/**
 * Checks size of the content a two locations.
 *
 * @param firstPath  path to the first location
 * @param secondPath path to the second location
 * @param fs         hadoop file system for the locations
 * @throws IOException//  w w w.j a v a  2 s  .  c o m
 */
public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException {
    final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath));
    final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath));
    LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false));
    LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false));
    Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(),
            "Contents at the two locations don't have same size.");
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputOneHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);/* w w w  .j a v a  2s.co  m*/
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";

    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);

    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);

    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputOneHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
}

From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java

License:Apache License

@Test(dataProvider = "generateSeparators")
public void twoHCatInputTwoHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);

    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir,
            dataDates);/* www  .  jav a 2s. c om*/
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2,
            dataDates);

    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();

    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());

    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName2, cols).partCols(partitionCols)
            .ifNotExists(true).isTableExternal(true).location(outputHDFSDir2).build());

    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);

    final String tableUriPartitionFragment = StringUtils
            .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";
    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);
    bundles[0].addInputFeedToBundle("inputData2", feedObj);

    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    String outputTableUri2 = "catalog:" + dbName + ":" + outputTableName2 + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);
    final String outputFeed1 = bundles[0].getOutputFeedFromBundle();
    final String outputFeed2Name = Util.readEntityName(outputFeed1) + "-second";
    FeedMerlin feedObj2 = new FeedMerlin(outputFeed1);
    feedObj2.setName(outputFeed2Name);
    feedObj2.getTable().setUri(outputTableUri2);
    bundles[0].addOutputFeedToBundle("outputData2", feedObj2);
    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputTwoHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();

    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);

    final ContentSummary inputContentSummary = clusterFS
            .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS
            .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS
            .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    final ContentSummary outputContentSummary2 = clusterFS
            .getContentSummary(new Path(outputHDFSDir2 + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    LOGGER.info("outputContentSummary2 = " + outputContentSummary2.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary.getLength(), "Unexpected size of the output.");
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(),
            outputContentSummary2.getLength(), "Unexpected size of the output.");
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Map<Path, ContentSummary> count(final boolean quota, String... uris) {

    final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length,
            new MapPrinter<Path, ContentSummary>() {
                @Override//from ww  w.ja v a2  s .  c  o  m
                public String toString(Path p, ContentSummary c) throws IOException {
                    return c.toString(quota) + p;
                }
            });

    for (String src : uris) {
        try {
            Path srcPath = new Path(src);
            final FileSystem fs = getFS(srcPath);
            FileStatus[] statuses = fs.globStatus(srcPath);
            Assert.notEmpty(statuses, "Can not find listing for " + src);
            for (FileStatus s : statuses) {
                Path p = s.getPath();
                results.put(p, fs.getContentSummary(p));
            }
        } catch (IOException ex) {
            throw new HadoopException("Cannot find listing " + ex.getMessage(), ex);
        }
    }

    return Collections.unmodifiableMap(results);
}