List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:org.apache.sysml.runtime.util.MapReduceTool.java
License:Apache License
public static MatrixCharacteristics[] processDimsFiles(String dir, MatrixCharacteristics[] stats) throws IOException { Path path = new Path(dir); FileSystem fs = IOUtilFunctions.getFileSystem(path); if (!fs.exists(path)) return stats; FileStatus fstat = fs.getFileStatus(path); if (fstat.isDirectory()) { FileStatus[] files = fs.listStatus(path); for (int i = 0; i < files.length; i++) { Path filePath = files[i].getPath(); try (BufferedReader br = setupInputFile(filePath.toString())) { String line = ""; while ((line = br.readLine()) != null) { String[] parts = line.split(" "); int resultIndex = Integer.parseInt(parts[0]); long maxRows = Long.parseLong(parts[1]); long maxCols = Long.parseLong(parts[2]); stats[resultIndex].setDimension( (stats[resultIndex].getRows() < maxRows ? maxRows : stats[resultIndex].getRows()), (stats[resultIndex].getCols() < maxCols ? maxCols : stats[resultIndex].getCols())); }/*from www. j a va2s . c o m*/ } } } else { throw new IOException(dir + " is expected to be a folder!"); } return stats; }
From source file:org.apache.tajo.engine.planner.LogicalPlanner.java
License:Apache License
private void updatePhysicalInfo(TableDesc desc) { if (desc.getPath() != null) { try {//from w ww .ja v a 2 s .c om FileSystem fs = desc.getPath().getFileSystem(new Configuration()); FileStatus status = fs.getFileStatus(desc.getPath()); if (desc.getStats() != null && (status.isDirectory() || status.isFile())) { ContentSummary summary = fs.getContentSummary(desc.getPath()); if (summary != null) { long volume = summary.getLength(); desc.getStats().setNumBytes(volume); } } } catch (Throwable t) { LOG.warn(t); } } }
From source file:org.apache.tajo.engine.planner.physical.PhysicalPlanUtil.java
License:Apache License
/** * * @param fs//from w w w . ja v a 2 s . c o m * @param path The table path * @param result The final result files to be used * @param startFileIndex * @param numResultFiles * @param currentFileIndex * @param partitioned A flag to indicate if this table is partitioned * @param currentDepth Current visiting depth of partition directories * @param maxDepth The partition depth of this table * @throws IOException */ private static void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex, int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth) throws IOException { // Intermediate directory if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path, FileStorageManager.hiddenFileFilter); if (files != null && files.length > 0) { for (FileStatus eachFile : files) { // checking if the enough number of files are found if (result.size() >= numResultFiles) { return; } if (eachFile.isDirectory()) { getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles, currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth maxDepth); // if partitioned table, we should ignore files located in the intermediate directory. // we can ensure that this file is in leaf directory if currentDepth == maxDepth. } else if (eachFile.isFile() && eachFile.getLen() > 0 && (!partitioned || currentDepth == maxDepth)) { if (currentFileIndex.get() >= startFileIndex) { result.add(eachFile); } currentFileIndex.incrementAndGet(); } } } // Files located in leaf directory } else { FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus != null && fileStatus.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(fileStatus); } currentFileIndex.incrementAndGet(); if (result.size() >= numResultFiles) { return; } } } }
From source file:org.apache.tajo.engine.planner.physical.TestPhysicalPlanner.java
License:Apache License
@Test public final void testPartitionedStorePlan() throws IOException, PlanningException { FileFragment[] frags = FileStorageManager.splitNG(conf, "default.score", score.getMeta(), new Path(score.getPath()), Integer.MAX_VALUE); TaskAttemptId id = LocalTajoTestingUtility.newTaskAttemptId(masterPlan); TaskAttemptContext ctx = new TaskAttemptContext(new QueryContext(conf), id, new FileFragment[] { frags[0] }, CommonTestingUtil/*from w w w . j a v a 2 s .c o m*/ .getTestDir(TajoTestingCluster.DEFAULT_TEST_DIRECTORY + "/testPartitionedStorePlan")); ctx.setEnforcer(new Enforcer()); Expr context = analyzer.parse(QUERIES[7]); LogicalPlan plan = planner.createPlan(defaultContext, context); int numPartitions = 3; Column key1 = new Column("default.score.deptname", Type.TEXT); Column key2 = new Column("default.score.class", Type.TEXT); DataChannel dataChannel = new DataChannel(masterPlan.newExecutionBlockId(), masterPlan.newExecutionBlockId(), ShuffleType.HASH_SHUFFLE, numPartitions); dataChannel.setShuffleKeys(new Column[] { key1, key2 }); ctx.setDataChannel(dataChannel); LogicalNode rootNode = optimizer.optimize(plan); TableMeta outputMeta = CatalogUtil.newTableMeta(dataChannel.getStoreType()); FileSystem fs = sm.getFileSystem(); QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); exec.close(); ctx.getHashShuffleAppenderManager().close(ebId); String executionBlockBaseDir = queryId.toString() + "/output" + "/" + ebId.getId() + "/hash-shuffle"; Path queryLocalTmpDir = new Path(conf.getVar(ConfVars.WORKER_TEMPORAL_DIR) + "/" + executionBlockBaseDir); FileStatus[] list = fs.listStatus(queryLocalTmpDir); List<Fragment> fragments = new ArrayList<Fragment>(); for (FileStatus status : list) { assertTrue(status.isDirectory()); FileStatus[] files = fs.listStatus(status.getPath()); for (FileStatus eachFile : files) { fragments.add(new FileFragment("partition", eachFile.getPath(), 0, eachFile.getLen())); } } assertEquals(numPartitions, fragments.size()); Scanner scanner = new MergeScanner(conf, rootNode.getOutSchema(), outputMeta, TUtil.newList(fragments)); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(6, tuple.get(2).asInt4()); // sum assertEquals(3, tuple.get(3).asInt4()); // max assertEquals(1, tuple.get(4).asInt4()); // min i++; } assertEquals(10, i); scanner.close(); // Examine the statistics information assertEquals(10, ctx.getResultStats().getNumRows().longValue()); fs.delete(queryLocalTmpDir, true); }
From source file:org.apache.tajo.engine.planner.physical.TestPhysicalPlanner.java
License:Apache License
@Test public final void testPartitionedStorePlanWithMaxFileSize() throws IOException, PlanningException { // Preparing working dir and input fragments FileFragment[] frags = FileStorageManager.splitNG(conf, "default.score_large", largeScore.getMeta(), new Path(largeScore.getPath()), Integer.MAX_VALUE); TaskAttemptId id = LocalTajoTestingUtility.newTaskAttemptId(masterPlan); Path workDir = CommonTestingUtil .getTestDir(TajoTestingCluster.DEFAULT_TEST_DIRECTORY + "/testPartitionedStorePlanWithMaxFileSize"); // Setting session variables QueryContext queryContext = new QueryContext(conf, session); queryContext.setInt(SessionVars.MAX_OUTPUT_FILE_SIZE, 1); // Preparing task context TaskAttemptContext ctx = new TaskAttemptContext(queryContext, id, new FileFragment[] { frags[0] }, workDir); ctx.setOutputPath(new Path(workDir, "part-01-000000")); // SortBasedColumnPartitionStoreExec will be chosen by default. ctx.setEnforcer(new Enforcer()); Expr context = analyzer.parse(CreateTableAsStmts[4]); LogicalPlan plan = planner.createPlan(queryContext, context); LogicalNode rootNode = optimizer.optimize(plan); // Executing CREATE TABLE PARTITION BY PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init();//from w w w . j a v a2s. c o m exec.next(); exec.close(); FileSystem fs = sm.getFileSystem(); FileStatus[] list = fs.listStatus(workDir); // checking the number of partitions assertEquals(2, list.length); List<Fragment> fragments = Lists.newArrayList(); int i = 0; for (FileStatus status : list) { assertTrue(status.isDirectory()); long fileVolumSum = 0; FileStatus[] fileStatuses = fs.listStatus(status.getPath()); for (FileStatus fileStatus : fileStatuses) { fileVolumSum += fileStatus.getLen(); fragments.add(new FileFragment("partition", fileStatus.getPath(), 0, fileStatus.getLen())); } assertTrue("checking the meaningfulness of test", fileVolumSum > StorageUnit.MB && fileStatuses.length > 1); long expectedFileNum = (long) Math.ceil(fileVolumSum / (float) StorageUnit.MB); assertEquals(expectedFileNum, fileStatuses.length); } TableMeta outputMeta = CatalogUtil.newTableMeta(StoreType.CSV); Scanner scanner = new MergeScanner(conf, rootNode.getOutSchema(), outputMeta, TUtil.newList(fragments)); scanner.init(); long rowNum = 0; while (scanner.next() != null) { rowNum++; } // checking the number of all written rows assertTrue(largeScore.getStats().getNumRows() == rowNum); scanner.close(); }
From source file:org.apache.tajo.engine.planner.physical.TestPhysicalPlanner.java
License:Apache License
@Test public final void testPartitionedStorePlanWithEmptyGroupingSet() throws IOException, PlanningException { FileFragment[] frags = FileStorageManager.splitNG(conf, "default.score", score.getMeta(), new Path(score.getPath()), Integer.MAX_VALUE); TaskAttemptId id = LocalTajoTestingUtility.newTaskAttemptId(masterPlan); Path workDir = CommonTestingUtil.getTestDir( TajoTestingCluster.DEFAULT_TEST_DIRECTORY + "/testPartitionedStorePlanWithEmptyGroupingSet"); TaskAttemptContext ctx = new TaskAttemptContext(new QueryContext(conf), id, new FileFragment[] { frags[0] }, workDir);/*from w w w . ja va2 s. c om*/ ctx.setEnforcer(new Enforcer()); Expr expr = analyzer.parse(QUERIES[14]); LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); int numPartitions = 1; DataChannel dataChannel = new DataChannel(masterPlan.newExecutionBlockId(), masterPlan.newExecutionBlockId(), ShuffleType.HASH_SHUFFLE, numPartitions); dataChannel.setShuffleKeys(new Column[] {}); ctx.setDataChannel(dataChannel); optimizer.optimize(plan); TableMeta outputMeta = CatalogUtil.newTableMeta(dataChannel.getStoreType()); FileSystem fs = sm.getFileSystem(); QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); exec.close(); ctx.getHashShuffleAppenderManager().close(ebId); String executionBlockBaseDir = queryId.toString() + "/output" + "/" + ebId.getId() + "/hash-shuffle"; Path queryLocalTmpDir = new Path(conf.getVar(ConfVars.WORKER_TEMPORAL_DIR) + "/" + executionBlockBaseDir); FileStatus[] list = fs.listStatus(queryLocalTmpDir); List<Fragment> fragments = new ArrayList<Fragment>(); for (FileStatus status : list) { assertTrue(status.isDirectory()); FileStatus[] files = fs.listStatus(status.getPath()); for (FileStatus eachFile : files) { fragments.add(new FileFragment("partition", eachFile.getPath(), 0, eachFile.getLen())); } } assertEquals(numPartitions, fragments.size()); Scanner scanner = new MergeScanner(conf, rootNode.getOutSchema(), outputMeta, TUtil.newList(fragments)); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(60, tuple.get(0).asInt4()); // sum assertEquals(3, tuple.get(1).asInt4()); // max assertEquals(1, tuple.get(2).asInt4()); // min i++; } assertEquals(1, i); scanner.close(); // Examine the statistics information assertEquals(1, ctx.getResultStats().getNumRows().longValue()); fs.delete(queryLocalTmpDir, true); }
From source file:org.apache.tajo.engine.planner.PlannerUtil.java
License:Apache License
private static void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex, int numResultFiles, AtomicInteger currentFileIndex) throws IOException { if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter); if (files != null && files.length > 0) { for (FileStatus eachFile : files) { if (result.size() >= numResultFiles) { return; }//from ww w . j av a 2s . c o m if (eachFile.isDirectory()) { getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles, currentFileIndex); } else if (eachFile.isFile() && eachFile.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(eachFile); } currentFileIndex.incrementAndGet(); } } } } else { FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus != null && fileStatus.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(fileStatus); } currentFileIndex.incrementAndGet(); if (result.size() >= numResultFiles) { return; } } } }
From source file:org.apache.tajo.master.rule.FileSystemRule.java
License:Apache License
private void canAccessToPath(FileStatus fsStatus, FsAction action) throws Exception { FsPermission permission = fsStatus.getPermission(); UserGroupInformation userGroupInformation = UserGroupInformation.getCurrentUser(); String userName = userGroupInformation.getShortUserName(); List<String> groupList = Arrays.asList(userGroupInformation.getGroupNames()); if (userName.equals(fsStatus.getOwner())) { if (permission.getUserAction().implies(action)) { return; }/*w w w . ja v a 2s. co m*/ } else if (groupList.contains(fsStatus.getGroup())) { if (permission.getGroupAction().implies(action)) { return; } } else { if (permission.getOtherAction().implies(action)) { return; } } throw new AccessControlException( String.format("Permission denied: user=%s, path=\"%s\":%s:%s:%s%s", userName, fsStatus.getPath(), fsStatus.getOwner(), fsStatus.getGroup(), fsStatus.isDirectory() ? "d" : "-", permission)); }
From source file:org.apache.tajo.plan.LogicalPlanner.java
License:Apache License
private void updatePhysicalInfo(TableDesc desc) { if (desc.getPath() != null && desc.getMeta().getStoreType() != StoreType.SYSTEM) { try {/*from www . ja va2 s .c o m*/ Path path = new Path(desc.getPath()); FileSystem fs = path.getFileSystem(new Configuration()); FileStatus status = fs.getFileStatus(path); if (desc.getStats() != null && (status.isDirectory() || status.isFile())) { ContentSummary summary = fs.getContentSummary(path); if (summary != null) { long volume = summary.getLength(); desc.getStats().setNumBytes(volume); } } } catch (Throwable t) { LOG.warn(t, t); } } }
From source file:org.apache.tajo.QueryTestCaseBase.java
License:Apache License
/** * Reads data file from Test Cluster's HDFS * @param path data parent path//from www .j a v a 2s . com * @return data file's contents * @throws Exception */ public String getTableFileContents(Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); FileStatus[] files = fs.listStatus(path); if (files == null || files.length == 0) { return ""; } StringBuilder sb = new StringBuilder(); byte[] buf = new byte[1024]; for (FileStatus file : files) { if (file.isDirectory()) { sb.append(getTableFileContents(file.getPath())); continue; } InputStream in = fs.open(file.getPath()); try { while (true) { int readBytes = in.read(buf); if (readBytes <= 0) { break; } sb.append(new String(buf, 0, readBytes)); } } finally { in.close(); } } return sb.toString(); }