List of usage examples for org.apache.hadoop.fs Path equals
@Override public boolean equals(Object o)
From source file:org.apache.drill.exec.physical.impl.scan.file.FileMetadata.java
License:Apache License
public FileMetadata(Path filePath, Path selectionRoot) { this.filePath = filePath; // If the data source is not a file, no file metadata is available. if (selectionRoot == null || filePath == null) { dirPath = null;/*www . ja v a2 s .c o m*/ return; } // If the query is against a single file, selection root and file path // will be identical, oddly. Path rootPath = Path.getPathWithoutSchemeAndAuthority(selectionRoot); Path bareFilePath = Path.getPathWithoutSchemeAndAuthority(filePath); if (rootPath.equals(bareFilePath)) { dirPath = null; return; } dirPath = ColumnExplorer.parsePartitions(filePath, rootPath, false); if (dirPath == null) { throw new IllegalArgumentException( String.format("Selection root of \"%s\" is not a leading path of \"%s\"", selectionRoot.toString(), filePath.toString())); } }
From source file:org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager.java
License:Apache License
/** * Specifies whether to plan based on the legacy meaning of "*". See * <a href="https://issues.apache.org/jira/browse/DRILL-5542">DRILL-5542</a>. * If true, then the star column <i>includes</i> implicit and partition * columns. If false, then star matches <i>only</i> table columns. * * @param optionManager access to the options for this query; used * too look up custom names for the metadata columns * @param useLegacyWildcardExpansion true to use the legacy plan, false to use the revised * semantics/*from ww w . ja v a2s.c om*/ * @param rootDir when scanning multiple files, the root directory for * the file set. Unfortunately, the planner is ambiguous on this one; if the * query is against a single file, then this variable holds the name of that * one file, rather than a directory * @param files the set of files to scan. Used to compute the maximum partition * depth across all readers in this fragment * * @return this builder */ public FileMetadataManager(OptionSet optionManager, boolean useLegacyWildcardExpansion, boolean useLegacyExpansionLocation, Path rootDir, int partitionCount, List<Path> files) { this.useLegacyWildcardExpansion = useLegacyWildcardExpansion; this.useLegacyExpansionLocation = useLegacyExpansionLocation; partitionDesignator = optionManager.getString(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL); for (ImplicitFileColumns e : ImplicitFileColumns.values()) { String colName = optionManager.getString(e.optionName()); if (!Strings.isEmpty(colName)) { FileMetadataColumnDefn defn = new FileMetadataColumnDefn(colName, e); implicitColDefns.add(defn); fileMetadataColIndex.put(defn.colName, defn); } } parser = new FileMetadataColumnsParser(this); // The files and root dir are optional. if (rootDir == null || files == null) { scanRootDir = null; this.partitionCount = 0; // Special case in which the file is the same as the // root directory (occurs for a query with only one file.) } else if (files.size() == 1 && rootDir.equals(files.get(0))) { scanRootDir = null; this.partitionCount = 0; } else { scanRootDir = rootDir; // Compute the partitions. Normally the count is passed in. // But, handle the case where the count is unknown. Note: use this // convenience only in testing since, in production, it can result // in different scans reporting different numbers of partitions. if (partitionCount == -1) { this.partitionCount = computeMaxPartition(files); } else { this.partitionCount = partitionCount; } } }
From source file:org.apache.falcon.entity.FileSystemStorage.java
License:Apache License
private void deleteParentIfEmpty(FileSystem fs, Path parent, Path feedBasePath) throws IOException { if (feedBasePath.equals(parent)) { LOG.info("Not deleting feed base path: {}", parent); } else {//from w w w.j av a 2 s . co m FileStatus[] files = fs.listStatus(parent); if (files != null && files.length == 0) { LOG.info("Parent path: {} is empty, deleting path", parent); if (fs.delete(parent, true)) { LOG.info("Deleted empty dir: {}", parent); } else { throw new IOException("Unable to delete parent path:" + parent); } deleteParentIfEmpty(fs, parent.getParent(), feedBasePath); } } }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private void moveFiles(Path srcDir, Path destDir) throws IOException { if (!srcDir.equals(destDir)) { // TODO: src and dest may be on different FS FileSystem fs = destDir.getFileSystem(jobConf); Preconditions.checkState(fs.exists(destDir) || fs.mkdirs(destDir), "Failed to create dest path " + destDir); if (overwrite) { // delete existing files for overwrite // TODO: support setting auto-purge? final boolean purge = true; // Note we assume the srcDir is a hidden dir, otherwise it will be deleted if it's a sub-dir of destDir FileStatus[] existingFiles = fs.listStatus(destDir, FileUtils.HIDDEN_FILES_PATH_FILTER); if (existingFiles != null) { HiveShim hiveShim = HiveShimLoader.loadHiveShim(); for (FileStatus existingFile : existingFiles) { Preconditions.checkState(hiveShim.moveToTrash(fs, existingFile.getPath(), jobConf, purge), "Failed to overwrite existing file " + existingFile); }/* ww w . ja va2 s. c o m*/ } } FileStatus[] srcFiles = fs.listStatus(srcDir, FileUtils.HIDDEN_FILES_PATH_FILTER); for (FileStatus srcFile : srcFiles) { Path srcPath = srcFile.getPath(); Path destPath = new Path(destDir, srcPath.getName()); int count = 1; while (!fs.rename(srcPath, destPath)) { String name = srcPath.getName() + "_copy_" + count; destPath = new Path(destDir, name); count++; } } } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
/** * Opens a new part file./*from w ww . j av a 2 s . c o m*/ * * <p> * This closes the old bucket file and retrieves a new bucket path from the {@code Bucketer}. */ private void openNewPartFile() throws Exception { closeCurrentPartFile(); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = new Path(basePath).getFileSystem(conf); Path newBucketDirectory = bucketer.getNextBucketPath(new Path(basePath)); if (!newBucketDirectory.equals(currentBucketDirectory)) { currentBucketDirectory = newBucketDirectory; try { if (fs.mkdirs(currentBucketDirectory)) { LOG.debug("Created new bucket directory: {}", currentBucketDirectory); } } catch (IOException e) { throw new RuntimeException("Could not create base path for new rolling file.", e); } } currentPartPath = new Path(currentBucketDirectory, partPrefix + "-" + subtaskIndex + "-" + partCounter); // This should work since there is only one parallel subtask that tries names with // our subtask id. Otherwise we would run into concurrency issues here. while (fs.exists(currentPartPath) || fs.exists(new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName()) .suffix(pendingSuffix))) { partCounter++; currentPartPath = new Path(currentBucketDirectory, partPrefix + "-" + subtaskIndex + "-" + partCounter); } // increase, so we don't have to check for this name next time partCounter++; LOG.debug("Next part path is {}", currentPartPath.toString()); Path inProgressPath = new Path(currentPartPath.getParent(), inProgressPrefix + currentPartPath.getName()) .suffix(inProgressSuffix); writer.open(fs, inProgressPath); isWriterOpen = true; }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
@Override public void abortJob(JobContext jobContext, State state) throws IOException { try {/*from ww w . j a v a 2 s . co m*/ if (dynamicPartitioningUsed) { discoverPartitions(jobContext); } org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil.createJobContext(jobContext); if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().abortJob(mapRedJobContext, state); } else if (dynamicPartitioningUsed) { for (JobContext currContext : contextDiscoveredByPath.values()) { try { new JobConf(currContext.getConfiguration()).getOutputCommitter().abortJob(currContext, state); } catch (Exception e) { throw new IOException(e); } } } Path src; OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration()); Path tblPath = new Path(jobInfo.getTableInfo().getTableLocation()); if (dynamicPartitioningUsed) { if (!customDynamicLocationUsed) { src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable().getPartitionKeysSize())); } else { src = new Path(getCustomPartitionRootLocation(jobInfo, jobContext.getConfiguration())); } } else { src = new Path(jobInfo.getLocation()); } FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); // Note fs.delete will fail on Windows. The reason is in OutputCommitter, // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still // open and remove the directory anyway, but on Windows, OS refuse to remove a // directory containing open files. So on Windows, we will leave output directory // behind when job fail. User needs to remove the output directory manually LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); if (!src.equals(tblPath)) { fs.delete(src, true); } } finally { cancelDelegationTokens(jobContext); } }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
private void registerPartitions(JobContext context) throws IOException { if (dynamicPartitioningUsed) { discoverPartitions(context);//from www . j a v a 2s.c o m } OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); Configuration conf = context.getConfiguration(); Table table = new Table(jobInfo.getTableInfo().getTable()); Path tblPath = new Path(table.getTTable().getSd().getLocation()); FileSystem fs = tblPath.getFileSystem(conf); if (table.getPartitionKeys().size() == 0) { //Move data from temp directory the actual table directory //No metastore operation required. Path src = new Path(jobInfo.getLocation()); moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable()); if (!src.equals(tblPath)) { fs.delete(src, true); } return; } IMetaStoreClient client = null; HCatTableInfo tableInfo = jobInfo.getTableInfo(); List<Partition> partitionsAdded = new ArrayList<Partition>(); try { HiveConf hiveConf = HCatUtil.getHiveConf(conf); client = HCatUtil.getHiveMetastoreClient(hiveConf); StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); FileStatus tblStat = fs.getFileStatus(tblPath); String grpName = tblStat.getGroup(); FsPermission perms = tblStat.getPermission(); List<Partition> partitionsToAdd = new ArrayList<Partition>(); if (!dynamicPartitioningUsed) { partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms)); } else { for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) { partitionsToAdd.add(constructPartition(context, jobInfo, getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(), entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms)); } } ArrayList<Map<String, String>> ptnInfos = new ArrayList<Map<String, String>>(); for (Partition ptn : partitionsToAdd) { ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); } /** * Dynamic partitioning & Append incompatibility note: * * Currently, we do not support mixing dynamic partitioning and append in the * same job. One reason is that we need exhaustive testing of corner cases * for that, and a second reason is the behaviour of add_partitions. To support * dynamic partitioning with append, we'd have to have a add_partitions_if_not_exist * call, rather than an add_partitions call. Thus far, we've tried to keep the * implementation of append jobtype-agnostic, but here, in code, we assume that * a table is considered immutable if dynamic partitioning is enabled on the job. * * This does not mean that we can check before the job begins that this is going * to be a dynamic partition job on an immutable table and thus fail the job, since * it is quite possible to have a dynamic partitioning job run on an unpopulated * immutable table. It simply means that at the end of the job, as far as copying * in data is concerned, we will pretend that the table is immutable irrespective * of what table.isImmutable() tells us. */ //Publish the new partition(s) if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())) { if (!customDynamicLocationUsed) { Path src = new Path(ptnRootLocation); // check here for each dir we're copying out, to see if it // already exists, error out if so. // Also, treat dyn-writes as writes to immutable tables. moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, tblPath, false, true); if (!src.equals(tblPath)) { fs.delete(src, true); } } else { moveCustomLocationTaskOutputs(fs, table, hiveConf); } try { updateTableSchema(client, table, jobInfo.getOutputSchema()); LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); client.add_partitions(partitionsToAdd); partitionsAdded = partitionsToAdd; } catch (Exception e) { // There was an error adding partitions : rollback fs copy and rethrow for (Partition p : partitionsToAdd) { Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); if (fs.exists(ptnPath)) { fs.delete(ptnPath, true); } } throw e; } } else { // no harProcessor, regular operation updateTableSchema(client, table, jobInfo.getOutputSchema()); LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); if (partitionsToAdd.size() > 0) { if (!dynamicPartitioningUsed) { // regular single-partition write into a partitioned table. //Move data from temp directory the actual table directory if (partitionsToAdd.size() > 1) { throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "More than one partition to publish in non-dynamic partitioning job"); } Partition p = partitionsToAdd.get(0); Path src = new Path(jobInfo.getLocation()); Path dest = new Path(p.getSd().getLocation()); moveTaskOutputs(fs, src, src, dest, true, table.isImmutable()); moveTaskOutputs(fs, src, src, dest, false, table.isImmutable()); if (!src.equals(dest)) { fs.delete(src, true); } // Now, we check if the partition already exists. If not, we go ahead. // If so, we error out if immutable, and if mutable, check that the partition's IF // matches our current job's IF (table's IF) to check for compatibility. If compatible, we // ignore and do not add. If incompatible, we error out again. boolean publishRequired = false; try { Partition existingP = client.getPartition(p.getDbName(), p.getTableName(), p.getValues()); if (existingP != null) { if (table.isImmutable()) { throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION, "Attempted duplicate partition publish on to immutable table"); } else { if (!existingP.getSd().getInputFormat() .equals(table.getInputFormatClass().getName())) { throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "Attempted partition append, where old partition format was " + existingP.getSd().getInputFormat() + " and table format was " + table.getInputFormatClass().getName()); } } } else { publishRequired = true; } } catch (NoSuchObjectException e) { // All good, no such partition exists, move on. publishRequired = true; } if (publishRequired) { client.add_partitions(partitionsToAdd); partitionsAdded = partitionsToAdd; } } else { // Dynamic partitioning usecase if (!customDynamicLocationUsed) { Path src = new Path(ptnRootLocation); moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, tblPath, false, true); if (!src.equals(tblPath)) { fs.delete(src, true); } } else { moveCustomLocationTaskOutputs(fs, table, hiveConf); } client.add_partitions(partitionsToAdd); partitionsAdded = partitionsToAdd; } } // Set permissions appropriately for each of the partitions we just created // so as to have their permissions mimic the table permissions for (Partition p : partitionsAdded) { applyGroupAndPerms(fs, new Path(p.getSd().getLocation()), tblStat.getPermission(), tblStat.getGroup(), true); } } } catch (Exception e) { if (partitionsAdded.size() > 0) { try { // baseCommitter.cleanupJob failed, try to clean up the // metastore for (Partition p : partitionsAdded) { client.dropPartition(tableInfo.getDatabaseName(), tableInfo.getTableName(), p.getValues(), true); } } catch (Exception te) { // Keep cause as the original exception throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); } } if (e instanceof HCatException) { throw (HCatException) e; } else { throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); } } finally { HCatUtil.closeHiveClientQuietly(client); } }
From source file:org.apache.hoya.avro.RoleHistoryWriter.java
License:Apache License
/** * Delete all old history entries older than the one we want to keep. This * uses the filename ordering to determine age, not timestamps * @param fileSystem filesystem/*ww w .j a v a2 s . c om*/ * @param keep path to keep -used in thresholding the files * @return the number of files deleted * @throws FileNotFoundException if the path to keep is not present (safety * check to stop the entire dir being purged) * @throws IOException IO problems */ public int purgeOlderHistoryEntries(FileSystem fileSystem, Path keep) throws IOException { assert fileSystem != null : "null filesystem"; if (!fileSystem.exists(keep)) { throw new FileNotFoundException(keep.toString()); } Path dir = keep.getParent(); log.debug("Purging entries in {} up to {}", dir, keep); List<Path> paths = findAllHistoryEntries(fileSystem, dir, true); Collections.sort(paths, new OlderFilesFirst()); int deleteCount = 0; for (Path path : paths) { if (path.equals(keep)) { break; } else { log.debug("Deleting {}", path); deleteCount++; fileSystem.delete(path, false); } } return deleteCount; }
From source file:org.apache.impala.common.FileSystemUtil.java
License:Apache License
/** * Returns true if Path 'p' is a descendant of Path 'parent', false otherwise. * This function relies on Path.equals() which requires paths to have the same * schema and authority to compare equal. So both 'p' and 'parent' should either * be qualified or unqualified paths for this function to behave as expected. *//* w w w . j a va 2 s . co m*/ public static boolean isDescendantPath(Path p, Path parent) { if (p == null || parent == null) return false; while (!p.isRoot() && p.depth() != parent.depth()) p = p.getParent(); if (p.isRoot()) return false; boolean result = p.equals(parent); if (!result && LOG.isTraceEnabled()) { // Add a message to the log if 'p' and 'parent' have inconsistent qualification. URI pUri = p.toUri(); URI parentUri = parent.toUri(); boolean sameScheme = Objects.equal(pUri.getScheme(), parentUri.getScheme()); boolean sameAuthority = Objects.equal(pUri.getAuthority(), parentUri.getAuthority()); if (!sameScheme || !sameAuthority) { LOG.trace("Inconsistent schema or authority for paths: " + p.toString() + " " + parent.toString()); } } return result; }
From source file:org.apache.oozie.action.hadoop.LauncherHelper.java
License:Apache License
/** * Utility function to load the contents of action data sequence file into * memory object//from www.j a v a2 s .co m * * @param fs Action Filesystem * @param actionDir Path * @param conf Configuration * @return Map action data * @throws IOException if an IO error occurred * @throws InterruptedException if UGI action is interrupted */ public static Map<String, String> getActionData(final FileSystem fs, final Path actionDir, final Configuration conf) throws IOException, InterruptedException { UserGroupInformationService ugiService = Services.get().get(UserGroupInformationService.class); UserGroupInformation ugi = ugiService.getProxyUser(conf.get(OozieClient.USER_NAME)); return ugi.doAs(new PrivilegedExceptionAction<Map<String, String>>() { @Override public Map<String, String> run() throws IOException { Map<String, String> ret = new HashMap<>(); Path seqFilePath = getActionDataSequenceFilePath(actionDir); if (fs.exists(seqFilePath)) { SequenceFile.Reader seqFile = new SequenceFile.Reader(fs, seqFilePath, conf); Text key = new Text(), value = new Text(); while (seqFile.next(key, value)) { ret.put(key.toString(), value.toString()); } seqFile.close(); } else { // maintain backward-compatibility. to be deprecated org.apache.hadoop.fs.FileStatus[] files = fs.listStatus(actionDir); InputStream is; BufferedReader reader; Properties props; if (files != null && files.length > 0) { for (FileStatus fileStatus : files) { Path path = fileStatus.getPath(); if (path.equals(new Path(actionDir, "externalChildIds.properties"))) { is = fs.open(path); reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); ret.put(LauncherAMUtils.ACTION_DATA_EXTERNAL_CHILD_IDS, IOUtils.getReaderAsString(reader, -1)); } else if (path.equals(new Path(actionDir, "newId.properties"))) { is = fs.open(path); reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); props = PropertiesUtils.readProperties(reader, -1); ret.put(LauncherAMUtils.ACTION_DATA_NEW_ID, props.getProperty("id")); } else if (path.equals(new Path(actionDir, LauncherAMUtils.ACTION_DATA_OUTPUT_PROPS))) { int maxOutputData = conf.getInt(LauncherAMUtils.CONF_OOZIE_ACTION_MAX_OUTPUT_DATA, 2 * 1024); is = fs.open(path); reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); ret.put(LauncherAMUtils.ACTION_DATA_OUTPUT_PROPS, PropertiesUtils .propertiesToString(PropertiesUtils.readProperties(reader, maxOutputData))); } else if (path.equals(new Path(actionDir, LauncherAMUtils.ACTION_DATA_STATS))) { int statsMaxOutputData = conf.getInt( LauncherAMUtils.CONF_OOZIE_EXTERNAL_STATS_MAX_SIZE, Integer.MAX_VALUE); is = fs.open(path); reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); ret.put(LauncherAMUtils.ACTION_DATA_STATS, PropertiesUtils.propertiesToString( PropertiesUtils.readProperties(reader, statsMaxOutputData))); } else if (path.equals(new Path(actionDir, LauncherAMUtils.ACTION_DATA_ERROR_PROPS))) { is = fs.open(path); reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); ret.put(LauncherAMUtils.ACTION_DATA_ERROR_PROPS, IOUtils.getReaderAsString(reader, -1)); } } } } return ret; } }); }