List of usage examples for org.apache.hadoop.fs ContentSummary toString
@Override public String toString(boolean qOption)
From source file:org.apache.falcon.regression.core.util.AssertUtil.java
License:Apache License
/** * Checks size of the content a two locations. * * @param firstPath path to the first location * @param secondPath path to the second location * @param fs hadoop file system for the locations * @throws IOException// w w w.j a v a 2 s . c o m */ public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException { final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath)); final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath)); LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false)); LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false)); Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(), "Contents at the two locations don't have same size."); }
From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java
License:Apache License
@Test(dataProvider = "generateSeparators") public void twoHCatInputOneHCatOutput(String separator) throws Exception { /* upload data and create partition */ final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator); List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir, dataDates);/* w w w .j a v a 2s.co m*/ final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2, dataDates); ArrayList<HCatFieldSchema> cols = new ArrayList<>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>(); partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build()); clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build()); clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build()); addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName); addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2); final String tableUriPartitionFragment = StringUtils .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator); String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment; String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment; bundles[0].setInputFeedTableUri(inputTableUri); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); final String inputFeed1 = bundles[0].getInputFeedFromBundle(); final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second"; FeedMerlin feedObj = new FeedMerlin(inputFeed1); feedObj.setName(inputFeed2Name); feedObj.getTable().setUri(inputTableUri2); bundles[0].addInputFeedToBundle("inputData2", feedObj); String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment; bundles[0].setOutputFeedTableUri(outputTableUri); bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setOutputFeedValidity(startDate, endDate); bundles[0].setProcessValidity(startDate, endDate); bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)"); bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputOneHCatOutput, EngineType.HIVE); bundles[0].submitFeedsScheduleProcess(); InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); final ContentSummary inputContentSummary = clusterFS .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0))); final ContentSummary inputContentSummary2 = clusterFS .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0))); final ContentSummary outputContentSummary = clusterFS .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0))); LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false)); LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false)); LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false)); Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary.getLength(), "Unexpected size of the output."); }
From source file:org.apache.falcon.regression.hcat.HCatProcessTest.java
License:Apache License
@Test(dataProvider = "generateSeparators") public void twoHCatInputTwoHCatOutput(String separator) throws Exception { /* upload data and create partition */ final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator); List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60); final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir, dataDates);/* www . jav a 2s. c om*/ final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2, dataDates); ArrayList<HCatFieldSchema> cols = new ArrayList<>(); cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment")); cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment")); ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>(); partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition")); clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build()); clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build()); clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build()); clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName2, cols).partCols(partitionCols) .ifNotExists(true).isTableExternal(true).location(outputHDFSDir2).build()); addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName); addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2); final String tableUriPartitionFragment = StringUtils .join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator); String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment; String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment; bundles[0].setInputFeedTableUri(inputTableUri); bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setInputFeedValidity(startDate, endDate); final String inputFeed1 = bundles[0].getInputFeedFromBundle(); final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second"; FeedMerlin feedObj = new FeedMerlin(inputFeed1); feedObj.setName(inputFeed2Name); feedObj.getTable().setUri(inputTableUri2); bundles[0].addInputFeedToBundle("inputData2", feedObj); String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment; String outputTableUri2 = "catalog:" + dbName + ":" + outputTableName2 + tableUriPartitionFragment; bundles[0].setOutputFeedTableUri(outputTableUri); bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setOutputFeedValidity(startDate, endDate); final String outputFeed1 = bundles[0].getOutputFeedFromBundle(); final String outputFeed2Name = Util.readEntityName(outputFeed1) + "-second"; FeedMerlin feedObj2 = new FeedMerlin(outputFeed1); feedObj2.setName(outputFeed2Name); feedObj2.getTable().setUri(outputTableUri2); bundles[0].addOutputFeedToBundle("outputData2", feedObj2); bundles[0].setProcessValidity(startDate, endDate); bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours); bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)"); bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputTwoHCatOutput, EngineType.HIVE); bundles[0].submitFeedsScheduleProcess(); InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS); final ContentSummary inputContentSummary = clusterFS .getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0))); final ContentSummary inputContentSummary2 = clusterFS .getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0))); final ContentSummary outputContentSummary = clusterFS .getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0))); final ContentSummary outputContentSummary2 = clusterFS .getContentSummary(new Path(outputHDFSDir2 + "/dt=" + dataDates.get(0))); LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false)); LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false)); LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false)); LOGGER.info("outputContentSummary2 = " + outputContentSummary2.toString(false)); Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary.getLength(), "Unexpected size of the output."); Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary2.getLength(), "Unexpected size of the output."); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Map<Path, ContentSummary> count(final boolean quota, String... uris) { final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length, new MapPrinter<Path, ContentSummary>() { @Override//from ww w.ja v a2 s . c o m public String toString(Path p, ContentSummary c) throws IOException { return c.toString(quota) + p; } }); for (String src : uris) { try { Path srcPath = new Path(src); final FileSystem fs = getFS(srcPath); FileStatus[] statuses = fs.globStatus(srcPath); Assert.notEmpty(statuses, "Can not find listing for " + src); for (FileStatus s : statuses) { Path p = s.getPath(); results.put(p, fs.getContentSummary(p)); } } catch (IOException ex) { throw new HadoopException("Cannot find listing " + ex.getMessage(), ex); } } return Collections.unmodifiableMap(results); }