List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.trendmicro.hdfs.webdav.HDFSResource.java
License:Apache License
@Override public String getHref() { StringBuilder sb = new StringBuilder(); Path p = this.path; while (p != null && !("".equals(p.getName()))) { sb.insert(0, p.getName());/*from ww w .ja v a2 s . com*/ sb.insert(0, "/"); p = p.getParent(); } if (sb.length() == 0) { sb.insert(0, "/"); } return sb.toString(); }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Assuming that the input is generated by {@link MatrixOutputFormat}, this method * convert it to a centralized dense matrix. * @param inPath the path to the {@link MapDir matrix} * @param nRows//from w w w . j ava 2s. c om * @param nCols * @param conf * @return * @throws IOException */ public static DenseMatrix mapDirToDenseMatrix(Path inPath, int nRows, int nCols, Configuration conf) throws IOException { Path tmpPath = inPath.getParent(); DistributedRowMatrix distMatrix = new DistributedRowMatrix(inPath, tmpPath, nRows, nCols); distMatrix.setConf(conf); return toDenseMatrix(distMatrix); }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Assuming that the input is generated by {@link MatrixOutputFormat}, this method * convert it to a centralized sparse matrix. * @param inPath the path to the {@link MapDir matrix} * @param nRows//from w w w .j av a2s .c o m * @param nCols * @param conf * @return * @throws IOException */ public static SparseMatrix mapDirToSparseMatrix(Path inPath, int nRows, int nCols, Configuration conf) throws IOException { Path tmpPath = inPath.getParent(); DistributedRowMatrix distMatrix = new DistributedRowMatrix(inPath, tmpPath, nRows, nCols); distMatrix.setConf(conf); return toSparseMatrix(distMatrix); }
From source file:com.uber.hoodie.common.model.HoodieLogFile.java
License:Apache License
public HoodieLogFile rollOver(FileSystem fs, String logWriteToken) throws IOException { String fileId = getFileId();//from w w w. j a v a 2 s . c om String baseCommitTime = getBaseCommitTime(); Path path = getPath(); String extension = "." + FSUtils.getFileExtensionFromLog(path); int newVersion = FSUtils.computeNextLogVersion(fs, path.getParent(), fileId, extension, baseCommitTime); return new HoodieLogFile(new Path(path.getParent(), FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion, logWriteToken))); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Gets all partition paths assuming date partitioning (year, month, day) three levels down. */// w ww. ja v a 2 s . c o m public static List<String> getAllPartitionFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException { List<String> datePartitions = new ArrayList<>(); // Avoid listing and including any folders under the metafolder PathFilter filter = getExcludeMetaPathFilter(); FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"), filter); for (FileStatus status : folders) { Path path = status.getPath(); datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(), path.getParent().getName(), path.getName())); } return datePartitions; }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Given a base partition and a partition path, return * relative path of partition path to the base path */// w w w . ja v a 2 s. c o m public static String getRelativePartitionPath(Path basePath, Path partitionPath) { basePath = Path.getPathWithoutSchemeAndAuthority(basePath); partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath); String partitionFullPath = partitionPath.toString(); int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(), basePath.getParent() == null ? 0 : basePath.getParent().toString().length()); // Partition-Path could be empty for non-partitioned tables return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? "" : partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Obtain all the partition paths, that are present in this table, denoted by presence of {@link * com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE} */// w w w . j a v a 2s. com public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr) throws IOException { final Path basePath = new Path(basePathStr); final List<String> partitions = new ArrayList<>(); processFiles(fs, basePathStr, (locatedFileStatus) -> { Path filePath = locatedFileStatus.getPath(); if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) { partitions.add(getRelativePartitionPath(basePath, filePath.getParent())); } return true; }, true); return partitions; }
From source file:com.uber.hoodie.hadoop.HoodieHiveUtil.java
License:Apache License
public static Path getNthParent(Path path, int n) { Path parent = path; for (int i = 0; i < n; i++) { parent = parent.getParent(); }/*from w ww .ja v a 2 s .c o m*/ return parent; }
From source file:com.uber.hoodie.hadoop.HoodieROTablePathFilter.java
License:Apache License
/** * Obtain the path, two levels from provided path * * @return said path if available, null otherwise *//* w w w. j a v a2 s .c o m*/ private Path safeGetParentsParent(Path path) { if (path.getParent() != null && path.getParent().getParent() != null && path.getParent().getParent().getParent() != null) { return path.getParent().getParent().getParent(); } return null; }
From source file:com.uber.hoodie.hadoop.HoodieROTablePathFilter.java
License:Apache License
@Override public boolean accept(Path path) { if (LOG.isDebugEnabled()) { LOG.debug("Checking acceptance for path " + path); }// ww w. ja va 2s. c o m Path folder = null; try { if (fs == null) { fs = path.getFileSystem(new Configuration()); } // Assumes path is a file folder = path.getParent(); // get the immediate parent. // Try to use the caches. if (nonHoodiePathCache.contains(folder.toString())) { if (LOG.isDebugEnabled()) { LOG.debug("Accepting non-hoodie path from cache: " + path); } return true; } if (hoodiePathCache.containsKey(folder.toString())) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("%s Hoodie path checked against cache, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path))); } return hoodiePathCache.get(folder.toString()).contains(path); } // Skip all files that are descendants of .hoodie in its path. String filePath = path.toString(); if (filePath.contains("/" + HoodieTableMetaClient.METAFOLDER_NAME + "/") || filePath.endsWith("/" + HoodieTableMetaClient.METAFOLDER_NAME)) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("Skipping Hoodie Metadata file %s \n", filePath)); } return false; } // Perform actual checking. Path baseDir; if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) { HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder); metadata.readFromFS(); baseDir = HoodieHiveUtil.getNthParent(folder, metadata.getPartitionDepth()); } else { baseDir = safeGetParentsParent(folder); } if (baseDir != null) { try { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), baseDir.toString()); HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(), fs.listStatus(folder)); List<HoodieDataFile> latestFiles = fsView.getLatestDataFiles().collect(Collectors.toList()); // populate the cache if (!hoodiePathCache.containsKey(folder.toString())) { hoodiePathCache.put(folder.toString(), new HashSet<>()); } LOG.info("Based on hoodie metadata from base path: " + baseDir.toString() + ", caching " + latestFiles.size() + " files under " + folder); for (HoodieDataFile lfile : latestFiles) { hoodiePathCache.get(folder.toString()).add(new Path(lfile.getPath())); } // accept the path, if its among the latest files. if (LOG.isDebugEnabled()) { LOG.debug(String.format("%s checked after cache population, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path))); } return hoodiePathCache.get(folder.toString()).contains(path); } catch (DatasetNotFoundException e) { // Non-hoodie path, accept it. if (LOG.isDebugEnabled()) { LOG.debug(String.format("(1) Caching non-hoodie path under %s \n", folder.toString())); } nonHoodiePathCache.add(folder.toString()); return true; } } else { // files is at < 3 level depth in FS tree, can't be hoodie dataset if (LOG.isDebugEnabled()) { LOG.debug(String.format("(2) Caching non-hoodie path under %s \n", folder.toString())); } nonHoodiePathCache.add(folder.toString()); return true; } } catch (Exception e) { String msg = "Error checking path :" + path + ", under folder: " + folder; LOG.error(msg, e); throw new HoodieException(msg, e); } }