List of usage examples for org.apache.hadoop.fs FileStatus isFile
public boolean isFile()
From source file:org.apache.tajo.parser.sql.TestSQLAnalyzer.java
License:Apache License
public Collection<File> getResourceFiles(String subdir) throws URISyntaxException, IOException { URL uri = ClassLoader.getSystemResource("queries/TestSQLAnalyzer"); Path positiveQueryDir = StorageUtil.concatPath(new Path(uri.toURI()), subdir); FileSystem fs = positiveQueryDir.getFileSystem(new TajoConf()); if (!fs.exists(positiveQueryDir)) { throw new IOException("Cannot find " + positiveQueryDir); }// w w w .j a va 2 s. c o m // get only files Collection<FileStatus> files = filter(Lists.newArrayList(fs.listStatus(positiveQueryDir)), new Predicate<FileStatus>() { @Override public boolean apply(@Nullable FileStatus input) { // TODO: This should be removed at TAJO-1891 if (input.getPath().getName().indexOf("add_partition") > -1) { return false; } else { return input.isFile(); } } }); // transform FileStatus into File return transform(files, new Function<FileStatus, File>() { @Override public File apply(@Nullable FileStatus fileStatus) { return new File(URI.create(fileStatus.getPath().toString())); } }); }
From source file:org.apache.tajo.plan.LogicalPlanner.java
License:Apache License
private void updatePhysicalInfo(TableDesc desc) { if (desc.getPath() != null && desc.getMeta().getStoreType() != StoreType.SYSTEM) { try {//w w w.j a va 2s .c o m Path path = new Path(desc.getPath()); FileSystem fs = path.getFileSystem(new Configuration()); FileStatus status = fs.getFileStatus(path); if (desc.getStats() != null && (status.isDirectory() || status.isFile())) { ContentSummary summary = fs.getContentSummary(path); if (summary != null) { long volume = summary.getLength(); desc.getStats().setNumBytes(volume); } } } catch (Throwable t) { LOG.warn(t, t); } } }
From source file:org.apache.tajo.storage.FileStorageManager.java
License:Apache License
/** * * @param fs//from w w w . j a v a 2 s. co m * @param path The table path * @param result The final result files to be used * @param startFileIndex * @param numResultFiles * @param currentFileIndex * @param partitioned A flag to indicate if this table is partitioned * @param currentDepth Current visiting depth of partition directories * @param maxDepth The partition depth of this table * @throws IOException */ private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex, int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth) throws IOException { // Intermediate directory if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter); if (files != null && files.length > 0) { for (FileStatus eachFile : files) { // checking if the enough number of files are found if (result.size() >= numResultFiles) { return; } if (eachFile.isDirectory()) { getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles, currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth maxDepth); // if partitioned table, we should ignore files located in the intermediate directory. // we can ensure that this file is in leaf directory if currentDepth == maxDepth. } else if (eachFile.isFile() && eachFile.getLen() > 0 && (!partitioned || currentDepth == maxDepth)) { if (currentFileIndex.get() >= startFileIndex) { result.add(eachFile); } currentFileIndex.incrementAndGet(); } } } // Files located in leaf directory } else { FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus != null && fileStatus.getLen() > 0) { if (currentFileIndex.get() >= startFileIndex) { result.add(fileStatus); } currentFileIndex.incrementAndGet(); if (result.size() >= numResultFiles) { return; } } } }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
/** * Finalizes result data. Tajo stores result data in the staging directory. * If the query fails, clean up the staging directory. * Otherwise the query is successful, move to the final directory from the staging directory. * * @param queryContext The query property * @param changeFileSeq If true change result file name with max sequence. * @return Saved path// ww w.j av a2s . c o m * @throws java.io.IOException */ protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq) throws IOException { Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Path finalOutputDir; if (!queryContext.get(QueryVars.OUTPUT_TABLE_URI, "").isEmpty()) { finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_URI)); try { FileSystem fs = stagingResultDir.getFileSystem(conf); if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO // It moves the original table into the temporary location. // Then it moves the new result table into the original table location. // Upon failed, it recovers the original table if possible. boolean movedToOldTable = false; boolean committed = false; Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); ContentSummary summary = fs.getContentSummary(stagingResultDir); // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not. boolean overwriteEnabled = queryContext .getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); // If existing data doesn't need to keep, check if there are some files. if ((!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) { // This is a map for existing non-leaf directory to rename. A key is current directory and a value is // renaming directory. Map<Path, Path> renameDirs = TUtil.newHashMap(); // This is a map for recovering existing partition directory. A key is current directory and a value is // temporary directory to back up. Map<Path, Path> recoveryDirs = TUtil.newHashMap(); try { if (!fs.exists(finalOutputDir)) { fs.mkdirs(finalOutputDir); } visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir, stagingResultDir.toString(), renameDirs, oldTableDir); // Rename target partition directories for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) { // Backup existing data files for recovering if (fs.exists(entry.getValue())) { String recoveryPathString = entry.getValue().toString() .replaceAll(finalOutputDir.toString(), oldTableDir.toString()); Path recoveryPath = new Path(recoveryPathString); fs.rename(entry.getValue(), recoveryPath); fs.exists(recoveryPath); recoveryDirs.put(entry.getValue(), recoveryPath); } // Delete existing directory fs.delete(entry.getValue(), true); // Rename staging directory to final output directory fs.rename(entry.getKey(), entry.getValue()); } } catch (IOException ioe) { // Remove created dirs for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) { fs.delete(entry.getValue(), true); } // Recovery renamed dirs for (Map.Entry<Path, Path> entry : recoveryDirs.entrySet()) { fs.delete(entry.getValue(), true); fs.rename(entry.getValue(), entry.getKey()); } throw new IOException(ioe.getMessage()); } } else { // no partition try { // if the final output dir exists, move all contents to the temporary table dir. // Otherwise, just make the final output dir. As a result, the final output dir will be empty. if (fs.exists(finalOutputDir)) { fs.mkdirs(oldTableDir); for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { fs.rename(status.getPath(), oldTableDir); } movedToOldTable = fs.exists(oldTableDir); } else { // if the parent does not exist, make its parent directory. fs.mkdirs(finalOutputDir); } // Move the results to the final output dir. for (FileStatus status : fs.listStatus(stagingResultDir)) { fs.rename(status.getPath(), finalOutputDir); } // Check the final output dir committed = fs.exists(finalOutputDir); } catch (IOException ioe) { // recover the old table if (movedToOldTable && !committed) { // if commit is failed, recover the old data for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { fs.delete(status.getPath(), true); } for (FileStatus status : fs.listStatus(oldTableDir)) { fs.rename(status.getPath(), finalOutputDir); } } throw new IOException(ioe.getMessage()); } } } else { String queryType = queryContext.get(QueryVars.COMMAND_TYPE); if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table NumberFormat fmt = NumberFormat.getInstance(); fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) { for (FileStatus eachFile : fs.listStatus(stagingResultDir)) { if (eachFile.isFile()) { LOG.warn("Partition table can't have file in a staging dir: " + eachFile.getPath()); continue; } moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1, changeFileSeq); } } else { int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; for (FileStatus eachFile : fs.listStatus(stagingResultDir)) { if (eachFile.getPath().getName().startsWith("_")) { continue; } moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq); } } // checking all file moved and remove empty dir verifyAllFileMoved(fs, stagingResultDir); FileStatus[] files = fs.listStatus(stagingResultDir); if (files != null && files.length != 0) { for (FileStatus eachFile : files) { LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); } } } else { // CREATE TABLE AS SELECT (CTAS) if (fs.exists(finalOutputDir)) { for (FileStatus status : fs.listStatus(stagingResultDir)) { fs.rename(status.getPath(), finalOutputDir); } } else { fs.rename(stagingResultDir, finalOutputDir); } LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); } } // remove the staging directory if the final output dir is given. Path stagingDirRoot = stagingDir.getParent(); fs.delete(stagingDirRoot, true); } catch (Throwable t) { LOG.error(t); throw new IOException(t); } } else { finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); } return finalOutputDir; }
From source file:org.apache.tajo.storage.FileTablespace.java
License:Apache License
/** * Make sure all files are moved./*from w ww .j a va 2s.c o m*/ * @param fs FileSystem * @param stagingPath The stagind directory * @return * @throws java.io.IOException */ private boolean verifyAllFileMoved(FileSystem fs, Path stagingPath) throws IOException { FileStatus[] files = fs.listStatus(stagingPath); if (files != null && files.length != 0) { for (FileStatus eachFile : files) { if (eachFile.isFile()) { LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); return false; } else { if (verifyAllFileMoved(fs, eachFile.getPath())) { fs.delete(eachFile.getPath(), false); } else { return false; } } } } return true; }
From source file:org.apache.tajo.storage.StorageManager.java
License:Apache License
/** * Finalizes result data. Tajo stores result data in the staging directory. * If the query fails, clean up the staging directory. * Otherwise the query is successful, move to the final directory from the staging directory. * * @param queryContext The query property * @param finalEbId The final execution block id * @param plan The query plan/*from w w w . j a va2 s .com*/ * @param schema The final output schema * @param tableDesc The description of the target table * @param changeFileSeq If true change result file name with max sequence. * @return Saved path * @throws java.io.IOException */ protected Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, TableDesc tableDesc, boolean changeFileSeq) throws IOException { Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Path finalOutputDir; if (!queryContext.get(QueryVars.OUTPUT_TABLE_PATH, "").isEmpty()) { finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_PATH)); try { FileSystem fs = stagingResultDir.getFileSystem(conf); if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO // It moves the original table into the temporary location. // Then it moves the new result table into the original table location. // Upon failed, it recovers the original table if possible. boolean movedToOldTable = false; boolean committed = false; Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); ContentSummary summary = fs.getContentSummary(stagingResultDir); if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty() && summary.getFileCount() > 0L) { // This is a map for existing non-leaf directory to rename. A key is current directory and a value is // renaming directory. Map<Path, Path> renameDirs = TUtil.newHashMap(); // This is a map for recovering existing partition directory. A key is current directory and a value is // temporary directory to back up. Map<Path, Path> recoveryDirs = TUtil.newHashMap(); try { if (!fs.exists(finalOutputDir)) { fs.mkdirs(finalOutputDir); } visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir, stagingResultDir.toString(), renameDirs, oldTableDir); // Rename target partition directories for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) { // Backup existing data files for recovering if (fs.exists(entry.getValue())) { String recoveryPathString = entry.getValue().toString() .replaceAll(finalOutputDir.toString(), oldTableDir.toString()); Path recoveryPath = new Path(recoveryPathString); fs.rename(entry.getValue(), recoveryPath); fs.exists(recoveryPath); recoveryDirs.put(entry.getValue(), recoveryPath); } // Delete existing directory fs.delete(entry.getValue(), true); // Rename staging directory to final output directory fs.rename(entry.getKey(), entry.getValue()); } } catch (IOException ioe) { // Remove created dirs for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) { fs.delete(entry.getValue(), true); } // Recovery renamed dirs for (Map.Entry<Path, Path> entry : recoveryDirs.entrySet()) { fs.delete(entry.getValue(), true); fs.rename(entry.getValue(), entry.getKey()); } throw new IOException(ioe.getMessage()); } } else { // no partition try { // if the final output dir exists, move all contents to the temporary table dir. // Otherwise, just make the final output dir. As a result, the final output dir will be empty. if (fs.exists(finalOutputDir)) { fs.mkdirs(oldTableDir); for (FileStatus status : fs.listStatus(finalOutputDir, StorageManager.hiddenFileFilter)) { fs.rename(status.getPath(), oldTableDir); } movedToOldTable = fs.exists(oldTableDir); } else { // if the parent does not exist, make its parent directory. fs.mkdirs(finalOutputDir); } // Move the results to the final output dir. for (FileStatus status : fs.listStatus(stagingResultDir)) { fs.rename(status.getPath(), finalOutputDir); } // Check the final output dir committed = fs.exists(finalOutputDir); } catch (IOException ioe) { // recover the old table if (movedToOldTable && !committed) { // if commit is failed, recover the old data for (FileStatus status : fs.listStatus(finalOutputDir, StorageManager.hiddenFileFilter)) { fs.delete(status.getPath(), true); } for (FileStatus status : fs.listStatus(oldTableDir)) { fs.rename(status.getPath(), finalOutputDir); } } throw new IOException(ioe.getMessage()); } } } else { String queryType = queryContext.get(QueryVars.COMMAND_TYPE); if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table NumberFormat fmt = NumberFormat.getInstance(); fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) { for (FileStatus eachFile : fs.listStatus(stagingResultDir)) { if (eachFile.isFile()) { LOG.warn("Partition table can't have file in a staging dir: " + eachFile.getPath()); continue; } moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1, changeFileSeq); } } else { int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; for (FileStatus eachFile : fs.listStatus(stagingResultDir)) { if (eachFile.getPath().getName().startsWith("_")) { continue; } moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq); } } // checking all file moved and remove empty dir verifyAllFileMoved(fs, stagingResultDir); FileStatus[] files = fs.listStatus(stagingResultDir); if (files != null && files.length != 0) { for (FileStatus eachFile : files) { LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); } } } else { // CREATE TABLE AS SELECT (CTAS) if (fs.exists(finalOutputDir)) { for (FileStatus status : fs.listStatus(stagingResultDir)) { fs.rename(status.getPath(), finalOutputDir); } } else { fs.rename(stagingResultDir, finalOutputDir); } LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); } } // remove the staging directory if the final output dir is given. Path stagingDirRoot = stagingDir.getParent(); fs.delete(stagingDirRoot, true); } catch (Throwable t) { LOG.error(t); throw new IOException(t); } } else { finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); } return finalOutputDir; }
From source file:org.apache.tajo.util.history.HistoryReader.java
License:Apache License
public List<QueryInfo> getQueries(String keyword) throws IOException { List<QueryInfo> queryInfos = new ArrayList<QueryInfo>(); FileSystem fs = HistoryWriter.getNonCrcFileSystem(historyParentPath, tajoConf); try {//from w w w .j a v a 2 s.c o m if (!fs.exists(historyParentPath)) { return queryInfos; } } catch (Throwable e) { return queryInfos; } FileStatus[] files = fs.listStatus(historyParentPath); if (files == null || files.length == 0) { return queryInfos; } for (FileStatus eachDateFile : files) { Path queryListPath = new Path(eachDateFile.getPath(), HistoryWriter.QUERY_LIST); if (eachDateFile.isFile() || !fs.exists(queryListPath)) { continue; } FileStatus[] dateFiles = fs.listStatus(queryListPath); if (dateFiles == null || dateFiles.length == 0) { continue; } for (FileStatus eachFile : dateFiles) { Path path = eachFile.getPath(); if (eachFile.isDirectory() || !path.getName().endsWith(HistoryWriter.HISTORY_FILE_POSTFIX)) { continue; } FSDataInputStream in = null; try { in = fs.open(path); byte[] buf = new byte[100 * 1024]; while (true) { int length = in.readInt(); if (length > buf.length) { buf = new byte[length]; } in.readFully(buf, 0, length); String queryInfoJson = new String(buf, 0, length, Bytes.UTF8_CHARSET); QueryInfo queryInfo = QueryInfo.fromJson(queryInfoJson); if (keyword != null) { if (queryInfo.getSql().indexOf(keyword) >= 0) { queryInfos.add(queryInfo); } } else { queryInfos.add(queryInfo); } } } catch (EOFException e) { } catch (Throwable e) { LOG.warn("Reading error:" + path + ", " + e.getMessage()); } finally { IOUtils.cleanup(LOG, in); } } } Collections.sort(queryInfos, new Comparator<QueryInfo>() { @Override public int compare(QueryInfo query1, QueryInfo query2) { return query2.getQueryIdStr().toString().compareTo(query1.getQueryIdStr().toString()); } }); return queryInfos; }
From source file:org.apache.tez.test.TestTezJobs.java
License:Apache License
private void verifyOutput(Path outputDir, FileSystem fs) throws IOException { FileStatus[] fileStatuses = fs.listStatus(outputDir); Path resultFile = null;/*from www. j a v a2s .co m*/ boolean foundResult = false; boolean foundSuccessFile = false; for (FileStatus fileStatus : fileStatuses) { if (!fileStatus.isFile()) { continue; } if (fileStatus.getPath().getName().equals("_SUCCESS")) { foundSuccessFile = true; continue; } if (fileStatus.getPath().getName().startsWith("part-")) { if (foundResult) { fail("Found 2 part files instead of 1" + ", paths=" + resultFile + "," + fileStatus.getPath()); } foundResult = true; resultFile = fileStatus.getPath(); LOG.info("Found output at " + resultFile); } } assertTrue(foundResult); assertTrue(resultFile != null); assertTrue(foundSuccessFile); verifyOrderedWordCountOutput(resultFile, fs); }
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.header.SamHdfsFileHeader.java
License:Open Source License
public static SAMFileHeader traversal(Path input, FileSystem fs, Configuration conf, boolean cram) { ArrayList<SAMFileHeader> mergeHeaders = new ArrayList<SAMFileHeader>(); SAMFileHeader mergedHeader = null;//from w w w.j ava 2 s . co m boolean matchedSortOrders = true; FileStatus status = null; try { status = fs.getFileStatus(input); } catch (IOException e2) { throw new FileNotExistException(input.getName()); } if (status.isFile()) { SAMFileHeader header = null; if (!cram) try { header = getSAMHeader(fs, input); } catch (IOException e) { throw new RuntimeException(e.toString()); } else try { header = getCramHeader(fs, input); } catch (IOException e) { throw new RuntimeException(e.toString()); } matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER; if (!contains(header, mergeHeaders)) mergeHeaders.add(header); } else { FileStatus[] stats = null; try { stats = fs.listStatus(input, new HeaderPathFilter()); } catch (IOException e) { throw new RuntimeException(e.toString()); } for (FileStatus file : stats) { Path filePath = file.getPath(); SAMFileHeader header = null; if (file.isFile()) { if (!cram) try { header = getSAMHeader(fs, filePath); } catch (IOException e) { throw new RuntimeException(e.toString()); } else try { header = getCramHeader(fs, filePath); } catch (IOException e) { throw new RuntimeException(e.toString()); } } else { header = traversal(filePath, fs, conf, cram); } matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER; if (!contains(header, mergeHeaders)) mergeHeaders.add(header); } } if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) { headerMergerSortOrder = SORT_ORDER; } else { headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; } mergedHeader = new SamFileHeaderMerger(headerMergerSortOrder, mergeHeaders, MERGE_SEQUENCE_DICTIONARIES) .getMergedHeader(); return mergedHeader; }
From source file:org.bgi.flexlab.gaea.data.mapreduce.util.HdfsFilesReader.java
License:Open Source License
public void traversal(String path, PathFilter pathFilter) { Path p = new Path(path); fs = HdfsFileManager.getFileSystem(p, conf); FileStatus status = null; try {/*from w w w. j a va 2s. co m*/ status = fs.getFileStatus(p); } catch (IOException e2) { throw new FileNotExistException(p.getName()); } if (status.isFile()) { if (!filter(p.getName())) files.add(p); } else { FileStatus[] stats = null; try { stats = fs.listStatus(p, pathFilter); } catch (IOException e) { throw new RuntimeException(e.toString()); } for (FileStatus file : stats) { if (!file.isFile()) { traversal(file.toString(), pathFilter); } else { if (!filter(file.getPath().getName())) files.add(file.getPath()); } } } if (size() == 0) return; FSDataInputStream currInput; try { currInput = fs.open(files.get(0)); lineReader = new LineReader(currInput, conf); } catch (IOException e) { throw new RuntimeException(e.toString()); } }