Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path) 

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:org.apache.gobblin.data.management.policy.HiddenFilterSelectionPolicy.java

License:Apache License

private Predicate<FileSystemDatasetVersion> getSelectionPredicate() {
    return new Predicate<FileSystemDatasetVersion>() {
        @Override/*w  w w.j a v a 2s.  c o  m*/
        public boolean apply(FileSystemDatasetVersion version) {
            Set<Path> paths = version.getPaths();
            for (Path path : paths) {
                Path p = path.getPathWithoutSchemeAndAuthority(path);
                if (isPathHidden(p)) {
                    return false;
                }
            }
            return true;
        }
    };
}

From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSource.java

License:Apache License

/**
 * Add lineage source info to a single work unit
 *
 * @param workUnit a single work unit, not an instance of {@link org.apache.gobblin.source.workunit.MultiWorkUnit}
 * @param state configurations//from  w w w. j  a  va 2 s .  c  o  m
 */
protected void addLineageSourceInfo(WorkUnit workUnit, State state) {
    if (!lineageInfo.isPresent()) {
        log.info("Lineage is not enabled");
        return;
    }

    String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM,
            DatasetConstants.PLATFORM_HDFS);
    Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
    String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString();
    DatasetDescriptor source = new DatasetDescriptor(platform, dataset);
    lineageInfo.get().setSource(source, workUnit);
}

From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSourceTest.java

License:Apache License

@Test
public void testSourceLineage() {
    String dataset = Path.getPathWithoutSchemeAndAuthority(sourceDir).toString();

    SourceState sourceState = new SourceState();
    sourceState.setBroker(jobBroker);//from   w w w.  j a  va  2 s  .c o m
    initState(sourceState);

    // Avro file based source
    AvroFileSource fileSource = new AvroFileSource();
    List<WorkUnit> workUnits = fileSource.getWorkunits(sourceState);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor("hdfs", dataset);
    for (WorkUnit workUnit : workUnits) {
        Assert.assertEquals(workUnit.getProp(SOURCE_LINEAGE_KEY), Descriptor.toJson(datasetDescriptor));
    }

    // Partitioned file based source
    // Test platform configuration
    sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM, DatasetConstants.PLATFORM_FILE);
    DatePartitionedJsonFileSource partitionedFileSource = new DatePartitionedJsonFileSource();
    workUnits = partitionedFileSource.getWorkunits(sourceState);
    datasetDescriptor = new DatasetDescriptor("file", dataset);

    Set<String> partitions = Sets.newHashSet("2017-12", "2018-01");
    for (WorkUnit workUnit : workUnits) {
        if (workUnit instanceof MultiWorkUnit) {
            DatasetDescriptor finalDatasetDescriptor = datasetDescriptor;
            ((MultiWorkUnit) workUnit).getWorkUnits()
                    .forEach(wu -> verifyPartitionSourceLineage(wu, partitions, finalDatasetDescriptor));
        } else {
            verifyPartitionSourceLineage(workUnit, partitions, datasetDescriptor);
        }
    }
}

From source file:org.apache.gobblin.source.PartitionedFileSourceBase.java

License:Apache License

@Override
protected void addLineageSourceInfo(WorkUnit workUnit, State state) {
    if (!lineageInfo.isPresent()) {
        log.info("Lineage is not enabled");
        return;/*from   w ww .j a  v a  2  s  .  c  om*/
    }

    String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM,
            DatasetConstants.PLATFORM_HDFS);
    Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
    String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString();
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor(platform, dataset);

    String partitionName = workUnit.getProp(ConfigurationKeys.WORK_UNIT_DATE_PARTITION_NAME);
    PartitionDescriptor descriptor = new PartitionDescriptor(partitionName, datasetDescriptor);

    lineageInfo.get().setSource(descriptor, workUnit);
}

From source file:org.apache.kylin.common.KylinConfigBase.java

License:Apache License

public String getReadHdfsWorkingDirectory() {
    if (StringUtils.isNotEmpty(getHBaseClusterFs())) {
        Path workingDir = new Path(getHdfsWorkingDirectory());
        return new Path(getHBaseClusterFs(), Path.getPathWithoutSchemeAndAuthority(workingDir)).toString()
                + "/";
    }//from w ww  . j  a v a 2 s. c  o m

    return getHdfsWorkingDirectory();
}

From source file:org.apache.kylin.common.persistence.JDBCResourceDAO.java

License:Apache License

public Path bigCellHDFSPath(String resPath) {
    String hdfsWorkingDirectory = this.kylinConfig.getHdfsWorkingDirectory();
    Path redirectPath = new Path(hdfsWorkingDirectory, "resources-jdbc" + resPath);
    redirectPath = Path.getPathWithoutSchemeAndAuthority(redirectPath);
    return redirectPath;
}

From source file:org.apache.kylin.storage.hbase.HBaseConnection.java

License:Apache License

public static String makeQualifiedPathInHBaseCluster(String inPath) {
    Path path = new Path(inPath);
    path = Path.getPathWithoutSchemeAndAuthority(path);

    try {//from   w  ww  .  jav  a  2 s  . c  o m
        FileSystem fs = FileSystem.get(getCurrentHBaseConfiguration());
        return fs.makeQualified(path).toString();
    } catch (IOException e) {
        throw new IllegalArgumentException("Cannot create FileSystem from current hbase cluster conf", e);
    }
}

From source file:org.apache.lens.server.util.ScannedPaths.java

License:Apache License

/**
 * Method that computes path of resources matching the input path or path regex pattern.
 * If provided path is a directory it additionally checks for the jar_order or glob_order file
 * that imposes ordering of resources and filters out other resources.
 *
 * Updates finalPaths List with matched paths and returns an iterator for matched paths.
 *//*from  w  ww . j  av a 2 s  .c  o m*/
private List<String> getMatchedPaths(Path pt, String type) {
    List<String> finalPaths = new ArrayList<>();
    InputStream resourceOrderIStream = null;
    FileSystem fs;

    try {
        fs = pt.getFileSystem(new Configuration());
        if (fs.exists(pt)) {
            if (fs.isFile(pt)) {
                /**
                 * CASE 1 : Direct FILE provided in path
                 **/
                finalPaths.add(pt.toUri().toString());
            } else if (fs.isDirectory(pt)) {
                /**
                 * CASE 2 : DIR provided in path
                 **/
                Path resourceOrderFile;
                FileStatus[] statuses;
                List<String> newMatches;
                List<String> resources;

                resourceOrderFile = new Path(pt, "jar_order");
                /** Add everything in dir if no jar_order or glob_order is present **/
                if (!fs.exists(resourceOrderFile)) {
                    resourceOrderFile = new Path(pt, "glob_order");
                    if (!fs.exists(resourceOrderFile)) {
                        resourceOrderFile = null;
                        /** Get matched resources recursively for all files **/
                        statuses = fs.globStatus(new Path(pt, "*"));
                        if (statuses != null) {
                            for (FileStatus st : statuses) {
                                newMatches = getMatchedPaths(st.getPath(), type);
                                finalPaths.addAll(newMatches);
                            }
                        }
                    }
                }
                if (resourceOrderFile != null) {
                    /** Else get jars as per order specified in jar_order/glob_order **/
                    resourceOrderIStream = fs.open(resourceOrderFile);
                    resources = IOUtils.readLines(resourceOrderIStream, Charset.forName("UTF-8"));
                    for (String resource : resources) {
                        if (StringUtils.isBlank(resource)) {
                            continue;
                        }
                        resource = resource.trim();

                        /** Get matched resources recursively for provided path/pattern **/
                        if (resource.startsWith("/") || resource.contains(":/")) {
                            newMatches = getMatchedPaths(new Path(resource), type);
                        } else {
                            newMatches = getMatchedPaths(new Path(pt, resource), type);
                        }
                        finalPaths.addAll(newMatches);
                    }
                }
            }
        } else {
            /**
             * CASE 3 : REGEX provided in path
             * */
            FileStatus[] statuses = fs.globStatus(Path.getPathWithoutSchemeAndAuthority(pt));
            if (statuses != null) {
                for (FileStatus st : statuses) {
                    List<String> newMatches = getMatchedPaths(st.getPath(), type);
                    finalPaths.addAll(newMatches);
                }
            }
        }
        filterDirsAndJarType(fs, finalPaths);
    } catch (FileNotFoundException fex) {
        log.error("File not found while scanning path. Path: {}, Type: {}", path, type, fex);
    } catch (Exception e) {
        log.error("Exception while initializing PathScanner. Path: {}, Type: {}", path, type, e);
    } finally {
        IOUtils.closeQuietly(resourceOrderIStream);
    }

    return finalPaths;
}

From source file:org.apache.sentry.core.common.utils.PathUtils.java

License:Apache License

/**
 * Parse a URI which can be HDFS, S3, SWIFT, WEBHDFS,etc. In either case it
 * should be on the same fs as the warehouse directory.
 */// w  w  w  .j a v a 2 s.  c  o m
public static String parseURI(String warehouseDir, String uri, boolean isLocal) throws URISyntaxException {
    Path warehouseDirPath = new Path(warehouseDir);
    Path uriPath = new Path(uri);

    if (uriPath.isAbsolute()) {
        // Merge warehouseDir and uri only when there is no scheme and authority
        // in uri.
        if (uriPath.isAbsoluteAndSchemeAuthorityNull()) {
            uriPath = uriPath.makeQualified(warehouseDirPath.toUri(), warehouseDirPath);
        }
        String uriScheme = uriPath.toUri().getScheme();
        String uriAuthority = uriPath.toUri().getAuthority();

        if (StringUtils.isEmpty(uriScheme) || isLocal) {
            uriScheme = LOCAL_FILE_SCHEMA;
            uriAuthority = "";
        }

        uriPath = new Path(uriScheme + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriAuthority)
                + Path.getPathWithoutSchemeAndAuthority(uriPath));
    } else {
        // don't support relative path
        throw new IllegalArgumentException("Invalid URI " + uri + ".");
    }
    return uriPath.toUri().toString();
}

From source file:org.apache.sentry.core.common.utils.PathUtils.java

License:Apache License

/**
 * Parse a URI which is on a local file system.
 *//*from  ww  w. j a  v a2s . co m*/
public static String parseLocalURI(String uri) throws URISyntaxException {
    Path uriPath = new Path(uri);
    if (uriPath.isAbsolute()) {
        uriPath = new Path(
                LOCAL_FILE_SCHEMA + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriPath.toUri().getAuthority())
                        + Path.getPathWithoutSchemeAndAuthority(uriPath));
    } else {
        throw new IllegalArgumentException("Parse URI does not work on relative URI: " + uri);
    }
    return uriPath.toUri().toString();
}