List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority
public static Path getPathWithoutSchemeAndAuthority(Path path)
From source file:org.apache.gobblin.data.management.policy.HiddenFilterSelectionPolicy.java
License:Apache License
private Predicate<FileSystemDatasetVersion> getSelectionPredicate() { return new Predicate<FileSystemDatasetVersion>() { @Override/*w w w.j a v a 2s. c o m*/ public boolean apply(FileSystemDatasetVersion version) { Set<Path> paths = version.getPaths(); for (Path path : paths) { Path p = path.getPathWithoutSchemeAndAuthority(path); if (isPathHidden(p)) { return false; } } return true; } }; }
From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSource.java
License:Apache License
/** * Add lineage source info to a single work unit * * @param workUnit a single work unit, not an instance of {@link org.apache.gobblin.source.workunit.MultiWorkUnit} * @param state configurations//from w w w. j a va 2 s . c o m */ protected void addLineageSourceInfo(WorkUnit workUnit, State state) { if (!lineageInfo.isPresent()) { log.info("Lineage is not enabled"); return; } String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM, DatasetConstants.PLATFORM_HDFS); Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY)); String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString(); DatasetDescriptor source = new DatasetDescriptor(platform, dataset); lineageInfo.get().setSource(source, workUnit); }
From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSourceTest.java
License:Apache License
@Test public void testSourceLineage() { String dataset = Path.getPathWithoutSchemeAndAuthority(sourceDir).toString(); SourceState sourceState = new SourceState(); sourceState.setBroker(jobBroker);//from w w w. j a va 2 s .c o m initState(sourceState); // Avro file based source AvroFileSource fileSource = new AvroFileSource(); List<WorkUnit> workUnits = fileSource.getWorkunits(sourceState); DatasetDescriptor datasetDescriptor = new DatasetDescriptor("hdfs", dataset); for (WorkUnit workUnit : workUnits) { Assert.assertEquals(workUnit.getProp(SOURCE_LINEAGE_KEY), Descriptor.toJson(datasetDescriptor)); } // Partitioned file based source // Test platform configuration sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM, DatasetConstants.PLATFORM_FILE); DatePartitionedJsonFileSource partitionedFileSource = new DatePartitionedJsonFileSource(); workUnits = partitionedFileSource.getWorkunits(sourceState); datasetDescriptor = new DatasetDescriptor("file", dataset); Set<String> partitions = Sets.newHashSet("2017-12", "2018-01"); for (WorkUnit workUnit : workUnits) { if (workUnit instanceof MultiWorkUnit) { DatasetDescriptor finalDatasetDescriptor = datasetDescriptor; ((MultiWorkUnit) workUnit).getWorkUnits() .forEach(wu -> verifyPartitionSourceLineage(wu, partitions, finalDatasetDescriptor)); } else { verifyPartitionSourceLineage(workUnit, partitions, datasetDescriptor); } } }
From source file:org.apache.gobblin.source.PartitionedFileSourceBase.java
License:Apache License
@Override protected void addLineageSourceInfo(WorkUnit workUnit, State state) { if (!lineageInfo.isPresent()) { log.info("Lineage is not enabled"); return;/*from w ww .j a v a 2 s . c om*/ } String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM, DatasetConstants.PLATFORM_HDFS); Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY)); String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString(); DatasetDescriptor datasetDescriptor = new DatasetDescriptor(platform, dataset); String partitionName = workUnit.getProp(ConfigurationKeys.WORK_UNIT_DATE_PARTITION_NAME); PartitionDescriptor descriptor = new PartitionDescriptor(partitionName, datasetDescriptor); lineageInfo.get().setSource(descriptor, workUnit); }
From source file:org.apache.kylin.common.KylinConfigBase.java
License:Apache License
public String getReadHdfsWorkingDirectory() { if (StringUtils.isNotEmpty(getHBaseClusterFs())) { Path workingDir = new Path(getHdfsWorkingDirectory()); return new Path(getHBaseClusterFs(), Path.getPathWithoutSchemeAndAuthority(workingDir)).toString() + "/"; }//from w ww . j a v a 2 s. c o m return getHdfsWorkingDirectory(); }
From source file:org.apache.kylin.common.persistence.JDBCResourceDAO.java
License:Apache License
public Path bigCellHDFSPath(String resPath) { String hdfsWorkingDirectory = this.kylinConfig.getHdfsWorkingDirectory(); Path redirectPath = new Path(hdfsWorkingDirectory, "resources-jdbc" + resPath); redirectPath = Path.getPathWithoutSchemeAndAuthority(redirectPath); return redirectPath; }
From source file:org.apache.kylin.storage.hbase.HBaseConnection.java
License:Apache License
public static String makeQualifiedPathInHBaseCluster(String inPath) { Path path = new Path(inPath); path = Path.getPathWithoutSchemeAndAuthority(path); try {//from w ww . jav a 2 s . c o m FileSystem fs = FileSystem.get(getCurrentHBaseConfiguration()); return fs.makeQualified(path).toString(); } catch (IOException e) { throw new IllegalArgumentException("Cannot create FileSystem from current hbase cluster conf", e); } }
From source file:org.apache.lens.server.util.ScannedPaths.java
License:Apache License
/** * Method that computes path of resources matching the input path or path regex pattern. * If provided path is a directory it additionally checks for the jar_order or glob_order file * that imposes ordering of resources and filters out other resources. * * Updates finalPaths List with matched paths and returns an iterator for matched paths. *//*from w ww . j av a 2 s .c o m*/ private List<String> getMatchedPaths(Path pt, String type) { List<String> finalPaths = new ArrayList<>(); InputStream resourceOrderIStream = null; FileSystem fs; try { fs = pt.getFileSystem(new Configuration()); if (fs.exists(pt)) { if (fs.isFile(pt)) { /** * CASE 1 : Direct FILE provided in path **/ finalPaths.add(pt.toUri().toString()); } else if (fs.isDirectory(pt)) { /** * CASE 2 : DIR provided in path **/ Path resourceOrderFile; FileStatus[] statuses; List<String> newMatches; List<String> resources; resourceOrderFile = new Path(pt, "jar_order"); /** Add everything in dir if no jar_order or glob_order is present **/ if (!fs.exists(resourceOrderFile)) { resourceOrderFile = new Path(pt, "glob_order"); if (!fs.exists(resourceOrderFile)) { resourceOrderFile = null; /** Get matched resources recursively for all files **/ statuses = fs.globStatus(new Path(pt, "*")); if (statuses != null) { for (FileStatus st : statuses) { newMatches = getMatchedPaths(st.getPath(), type); finalPaths.addAll(newMatches); } } } } if (resourceOrderFile != null) { /** Else get jars as per order specified in jar_order/glob_order **/ resourceOrderIStream = fs.open(resourceOrderFile); resources = IOUtils.readLines(resourceOrderIStream, Charset.forName("UTF-8")); for (String resource : resources) { if (StringUtils.isBlank(resource)) { continue; } resource = resource.trim(); /** Get matched resources recursively for provided path/pattern **/ if (resource.startsWith("/") || resource.contains(":/")) { newMatches = getMatchedPaths(new Path(resource), type); } else { newMatches = getMatchedPaths(new Path(pt, resource), type); } finalPaths.addAll(newMatches); } } } } else { /** * CASE 3 : REGEX provided in path * */ FileStatus[] statuses = fs.globStatus(Path.getPathWithoutSchemeAndAuthority(pt)); if (statuses != null) { for (FileStatus st : statuses) { List<String> newMatches = getMatchedPaths(st.getPath(), type); finalPaths.addAll(newMatches); } } } filterDirsAndJarType(fs, finalPaths); } catch (FileNotFoundException fex) { log.error("File not found while scanning path. Path: {}, Type: {}", path, type, fex); } catch (Exception e) { log.error("Exception while initializing PathScanner. Path: {}, Type: {}", path, type, e); } finally { IOUtils.closeQuietly(resourceOrderIStream); } return finalPaths; }
From source file:org.apache.sentry.core.common.utils.PathUtils.java
License:Apache License
/** * Parse a URI which can be HDFS, S3, SWIFT, WEBHDFS,etc. In either case it * should be on the same fs as the warehouse directory. */// w w w .j a v a 2 s. c o m public static String parseURI(String warehouseDir, String uri, boolean isLocal) throws URISyntaxException { Path warehouseDirPath = new Path(warehouseDir); Path uriPath = new Path(uri); if (uriPath.isAbsolute()) { // Merge warehouseDir and uri only when there is no scheme and authority // in uri. if (uriPath.isAbsoluteAndSchemeAuthorityNull()) { uriPath = uriPath.makeQualified(warehouseDirPath.toUri(), warehouseDirPath); } String uriScheme = uriPath.toUri().getScheme(); String uriAuthority = uriPath.toUri().getAuthority(); if (StringUtils.isEmpty(uriScheme) || isLocal) { uriScheme = LOCAL_FILE_SCHEMA; uriAuthority = ""; } uriPath = new Path(uriScheme + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriAuthority) + Path.getPathWithoutSchemeAndAuthority(uriPath)); } else { // don't support relative path throw new IllegalArgumentException("Invalid URI " + uri + "."); } return uriPath.toUri().toString(); }
From source file:org.apache.sentry.core.common.utils.PathUtils.java
License:Apache License
/** * Parse a URI which is on a local file system. *//*from ww w. j a v a2s . co m*/ public static String parseLocalURI(String uri) throws URISyntaxException { Path uriPath = new Path(uri); if (uriPath.isAbsolute()) { uriPath = new Path( LOCAL_FILE_SCHEMA + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriPath.toUri().getAuthority()) + Path.getPathWithoutSchemeAndAuthority(uriPath)); } else { throw new IllegalArgumentException("Parse URI does not work on relative URI: " + uri); } return uriPath.toUri().toString(); }