List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:org.apache.tajo.QueryTestCaseBase.java
License:Apache License
private List<Path> listFiles(FileSystem fs, Path path) throws Exception { List<Path> result = new ArrayList<Path>(); FileStatus[] files = fs.listStatus(path); if (files == null || files.length == 0) { return result; }// w w w.j av a2 s . co m for (FileStatus eachFile : files) { if (eachFile.isDirectory()) { result.addAll(listFiles(fs, eachFile.getPath())); } else { result.add(eachFile.getPath()); } } return result; }
From source file:org.apache.tajo.storage.AbstractStorageManager.java
License:Apache License
/** * List input directories./*from w ww . j av a 2 s .c om*/ * Subclasses may override to, e.g., select only files matching a regular * expression. * * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(Path... dirs) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(conf); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:org.apache.tajo.storage.FileStorageManager.java
License:Apache License
/** * * @param fs/*from w ww .j av a 2s . c o m*/ * @param path The table path * @param result The final result files to be used * @param startFileIndex * @param numResultFiles * @param currentFileIndex * @param partitioned A flag to indicate if this table is partitioned * @param currentDepth Current visiting depth of partition directories * @param maxDepth The partition depth of this table * @throws IOException */ private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex, int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth) throws IOException { // Intermediate directory if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter); if (files != null && files.length > 0) { for (FileStatus eachFile : files) { // checking if the enough number of files are found if (result.size() >= numResultFiles) { return; } if (eachFile.isDirectory()) { getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles, currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth maxDepth); // if partitioned table, we should ignore files located in the intermediate directory. // we can ensure that this file is in leaf directory if currentDepth == maxDepth. } else if (eachFile.isFile() && eachFile.getLen() > 0 && (!partitioned || currentDepth == maxDepth)) { if (currentFileIndex.get() >= startFileIndex) { result.add(eachFile); } currentFileIndex.incrementAndGet(); } } } // Files located in leaf directory } else { FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus != null && fileStatus.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(fileStatus); } currentFileIndex.incrementAndGet(); if (result.size() >= numResultFiles) { return; } } } }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
/** * List input directories./*from w w w . j a v a2s . c om*/ * Subclasses may override to, e.g., select only files matching a regular * expression. * * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(Path... dirs) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
/** * Attach the sequence number to the output file name and than move the file into the final result path. * * @param fs FileSystem/*from w w w . j a v a 2s. c o m*/ * @param stagingResultDir The staging result dir * @param fileStatus The file status * @param finalOutputPath Final output path * @param nf Number format * @param fileSeq The sequence number * @throws java.io.IOException */ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, FileStatus fileStatus, Path finalOutputPath, NumberFormat nf, int fileSeq, boolean changeFileSeq) throws IOException { if (fileStatus.isDirectory()) { String subPath = extractSubPath(stagingResultDir, fileStatus.getPath()); if (subPath != null) { Path finalSubPath = new Path(finalOutputPath, subPath); if (!fs.exists(finalSubPath)) { fs.mkdirs(finalSubPath); } int maxSeq = StorageUtil.getMaxFileSequence(fs, finalSubPath, false); for (FileStatus eachFile : fs.listStatus(fileStatus.getPath())) { if (eachFile.getPath().getName().startsWith("_")) { continue; } moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq, changeFileSeq); } } else { throw new IOException("Wrong staging dir:" + stagingResultDir + "," + fileStatus.getPath()); } } else { String subPath = extractSubPath(stagingResultDir, fileStatus.getPath()); if (subPath != null) { Path finalSubPath = new Path(finalOutputPath, subPath); if (changeFileSeq) { finalSubPath = new Path(finalSubPath.getParent(), replaceFileNameSeq(finalSubPath, fileSeq, nf)); } if (!fs.exists(finalSubPath.getParent())) { fs.mkdirs(finalSubPath.getParent()); } if (fs.exists(finalSubPath)) { throw new IOException("Already exists data file:" + finalSubPath); } boolean success = fs.rename(fileStatus.getPath(), finalSubPath); if (success) { LOG.info("Moving staging file[" + fileStatus.getPath() + "] + " + "to final output[" + finalSubPath + "]"); } else { LOG.error("Can't move staging file[" + fileStatus.getPath() + "] + " + "to final output[" + finalSubPath + "]"); } } } }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
/** * This method sets a rename map which includes renamed staging directory to final output directory recursively. * If there exists some data files, this delete it for duplicate data. * * * @param fs// w w w . ja va2 s .co m * @param stagingPath * @param outputPath * @param stagingParentPathString * @throws java.io.IOException */ private void visitPartitionedDirectory(FileSystem fs, Path stagingPath, Path outputPath, String stagingParentPathString, Map<Path, Path> renameDirs, Path oldTableDir) throws IOException { FileStatus[] files = fs.listStatus(stagingPath); for (FileStatus eachFile : files) { if (eachFile.isDirectory()) { Path oldPath = eachFile.getPath(); // Make recover directory. String recoverPathString = oldPath.toString().replaceAll(stagingParentPathString, oldTableDir.toString()); Path recoveryPath = new Path(recoverPathString); if (!fs.exists(recoveryPath)) { fs.mkdirs(recoveryPath); } visitPartitionedDirectory(fs, eachFile.getPath(), outputPath, stagingParentPathString, renameDirs, oldTableDir); // Find last order partition for renaming String newPathString = oldPath.toString().replaceAll(stagingParentPathString, outputPath.toString()); Path newPath = new Path(newPathString); if (!isLeafDirectory(fs, eachFile.getPath())) { renameDirs.put(eachFile.getPath(), newPath); } else { if (!fs.exists(newPath)) { fs.mkdirs(newPath); } } } } }
From source file:org.apache.tajo.storage.s3.S3TableSpace.java
License:Apache License
/** * Calculate the total size of all objects in the indicated bucket * * @param path to use/* w w w . j a va2 s . co m*/ * @return calculated size * @throws IOException */ @Override public long calculateSize(Path path) throws IOException { long totalBucketSize = 0L; if (s3Enabled) { String key = pathToKey(path); final FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus.isDirectory()) { if (!key.isEmpty()) { key = key + "/"; } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(uri.getHost()); request.setPrefix(key); request.setMaxKeys(maxKeys); if (LOG.isDebugEnabled()) { LOG.debug("listStatus: doing listObjects for directory " + key); } ObjectListing objects = s3.listObjects(request); while (true) { for (S3ObjectSummary summary : objects.getObjectSummaries()) { Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory()); // Skip over keys that are ourselves and old S3N _$folder$ files if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) { if (LOG.isDebugEnabled()) { LOG.debug("Ignoring: " + keyPath); } continue; } if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) { totalBucketSize += summary.getSize(); } } if (objects.isTruncated()) { if (LOG.isDebugEnabled()) { LOG.debug("listStatus: list truncated - getting next batch"); } objects = s3.listNextBatchOfObjects(objects); } else { break; } } } else { return fileStatus.getLen(); } } else { totalBucketSize = fs.getContentSummary(path).getLength(); } return totalBucketSize; }
From source file:org.apache.tajo.storage.StorageUtil.java
License:Apache License
/** * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*". * * This method finds the maximum sequence number from existing data files through the above patterns. * If it cannot find any matched file or the maximum number, it will return -1. * * @param fs//ww w.ja v a 2s . c o m * @param path * @param recursive * @return The maximum sequence number * @throws java.io.IOException */ public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException { if (!fs.isDirectory(path)) { return -1; } FileStatus[] files = fs.listStatus(path); if (files == null || files.length == 0) { return -1; } int maxValue = -1; for (FileStatus eachFile : files) { // In the case of partition table, return largest value within all partition dirs. int value; if (eachFile.isDirectory() && recursive) { value = getMaxFileSequence(fs, eachFile.getPath(), recursive); if (value > maxValue) { maxValue = value; } } else { if (eachFile.getPath().getName().matches(fileNamePatternV08) || eachFile.getPath().getName().matches(fileNamePatternV09)) { value = getSequence(eachFile.getPath().getName()); if (value > maxValue) { maxValue = value; } } } } return maxValue; }
From source file:org.apache.tajo.util.history.HistoryReader.java
License:Apache License
public List<QueryInfo> getQueries(String keyword) throws IOException { List<QueryInfo> queryInfos = new ArrayList<QueryInfo>(); FileSystem fs = HistoryWriter.getNonCrcFileSystem(historyParentPath, tajoConf); try {/*ww w . j av a 2 s . c om*/ if (!fs.exists(historyParentPath)) { return queryInfos; } } catch (Throwable e) { return queryInfos; } FileStatus[] files = fs.listStatus(historyParentPath); if (files == null || files.length == 0) { return queryInfos; } for (FileStatus eachDateFile : files) { Path queryListPath = new Path(eachDateFile.getPath(), HistoryWriter.QUERY_LIST); if (eachDateFile.isFile() || !fs.exists(queryListPath)) { continue; } FileStatus[] dateFiles = fs.listStatus(queryListPath); if (dateFiles == null || dateFiles.length == 0) { continue; } for (FileStatus eachFile : dateFiles) { Path path = eachFile.getPath(); if (eachFile.isDirectory() || !path.getName().endsWith(HistoryWriter.HISTORY_FILE_POSTFIX)) { continue; } FSDataInputStream in = null; try { in = fs.open(path); byte[] buf = new byte[100 * 1024]; while (true) { int length = in.readInt(); if (length > buf.length) { buf = new byte[length]; } in.readFully(buf, 0, length); String queryInfoJson = new String(buf, 0, length, Bytes.UTF8_CHARSET); QueryInfo queryInfo = QueryInfo.fromJson(queryInfoJson); if (keyword != null) { if (queryInfo.getSql().indexOf(keyword) >= 0) { queryInfos.add(queryInfo); } } else { queryInfos.add(queryInfo); } } } catch (EOFException e) { } catch (Throwable e) { LOG.warn("Reading error:" + path + ", " + e.getMessage()); } finally { IOUtils.cleanup(LOG, in); } } } Collections.sort(queryInfos, new Comparator<QueryInfo>() { @Override public int compare(QueryInfo query1, QueryInfo query2) { return query2.getQueryIdStr().toString().compareTo(query1.getQueryIdStr().toString()); } }); return queryInfos; }
From source file:org.apache.tez.client.TezClientUtils.java
License:Apache License
private static void addLocalResources(Configuration conf, String[] configUris, Map<String, LocalResource> tezJarResources, Credentials credentials) throws IOException { if (configUris == null || configUris.length == 0) { return;//from w w w.j av a 2 s . co m } List<Path> configuredPaths = Lists.newArrayListWithCapacity(configUris.length); for (String configUri : configUris) { boolean ancestorsHavePermission = checkAncestorPermissionsForAllUsers(conf, configUri, FsAction.EXECUTE); FileStatus[] fileStatuses = getLRFileStatus(configUri, conf); for (FileStatus fStatus : fileStatuses) { if (fStatus.isDirectory()) { // Skip directories - no recursive search support. continue; } LocalResourceVisibility lrVisibility; if (ancestorsHavePermission && fStatus.getPermission().getOtherAction().implies(FsAction.READ)) { lrVisibility = LocalResourceVisibility.PUBLIC; } else { lrVisibility = LocalResourceVisibility.PRIVATE; } String rsrcName = fStatus.getPath().getName(); if (tezJarResources.containsKey(rsrcName)) { String message = "Duplicate resource found" + ", resourceName=" + rsrcName + ", existingPath=" + tezJarResources.get(rsrcName).getResource().toString() + ", newPath=" + fStatus.getPath(); LOG.warn(message); } tezJarResources.put(rsrcName, LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(fStatus.getPath()), LocalResourceType.FILE, lrVisibility, fStatus.getLen(), fStatus.getModificationTime())); configuredPaths.add(fStatus.getPath()); } } // Obtain credentials. if (!configuredPaths.isEmpty()) { TokenCache.obtainTokensForFileSystems(credentials, configuredPaths.toArray(new Path[configuredPaths.size()]), conf); } }