Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path)

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:org.apache.gobblin.data.management.policy.HiddenFilterSelectionPolicy.java

License:Apache License

private Predicate<FileSystemDatasetVersion> getSelectionPredicate() {
    return new Predicate<FileSystemDatasetVersion>() {
        @Override/*w  w w.j a v a 2s.  c o  m*/
        public boolean apply(FileSystemDatasetVersion version) {
            Set<Path> paths = version.getPaths();
            for (Path path : paths) {
                Path p = path.getPathWithoutSchemeAndAuthority(path);
                if (isPathHidden(p)) {
                    return false;
                }
            }
            return true;
        }
    };
}

From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSource.java

License:Apache License

/**
 * Add lineage source info to a single work unit
 *
 * @param workUnit a single work unit, not an instance of {@link org.apache.gobblin.source.workunit.MultiWorkUnit}
 * @param state configurations//from  w w w. j  a  va 2 s .  c  o  m
 */
protected void addLineageSourceInfo(WorkUnit workUnit, State state) {
    if (!lineageInfo.isPresent()) {
        log.info("Lineage is not enabled");
        return;
    }

    String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM,
            DatasetConstants.PLATFORM_HDFS);
    Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
    String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString();
    DatasetDescriptor source = new DatasetDescriptor(platform, dataset);
    lineageInfo.get().setSource(source, workUnit);
}

From source file:org.apache.gobblin.source.extractor.filebased.FileBasedSourceTest.java

License:Apache License

@Test
public void testSourceLineage() {
    String dataset = Path.getPathWithoutSchemeAndAuthority(sourceDir).toString();

    SourceState sourceState = new SourceState();
    sourceState.setBroker(jobBroker);//from   w w w.  j a  va  2 s  .c o m
    initState(sourceState);

    // Avro file based source
    AvroFileSource fileSource = new AvroFileSource();
    List<WorkUnit> workUnits = fileSource.getWorkunits(sourceState);
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor("hdfs", dataset);
    for (WorkUnit workUnit : workUnits) {
        Assert.assertEquals(workUnit.getProp(SOURCE_LINEAGE_KEY), Descriptor.toJson(datasetDescriptor));
    }

    // Partitioned file based source
    // Test platform configuration
    sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM, DatasetConstants.PLATFORM_FILE);
    DatePartitionedJsonFileSource partitionedFileSource = new DatePartitionedJsonFileSource();
    workUnits = partitionedFileSource.getWorkunits(sourceState);
    datasetDescriptor = new DatasetDescriptor("file", dataset);

    Set<String> partitions = Sets.newHashSet("2017-12", "2018-01");
    for (WorkUnit workUnit : workUnits) {
        if (workUnit instanceof MultiWorkUnit) {
            DatasetDescriptor finalDatasetDescriptor = datasetDescriptor;
            ((MultiWorkUnit) workUnit).getWorkUnits()
                    .forEach(wu -> verifyPartitionSourceLineage(wu, partitions, finalDatasetDescriptor));
        } else {
            verifyPartitionSourceLineage(workUnit, partitions, datasetDescriptor);
        }
    }
}

From source file:org.apache.gobblin.source.PartitionedFileSourceBase.java

License:Apache License

@Override
protected void addLineageSourceInfo(WorkUnit workUnit, State state) {
    if (!lineageInfo.isPresent()) {
        log.info("Lineage is not enabled");
        return;/*from   w ww .j a  v a  2  s  .  c  om*/
    }

    String platform = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_PLATFORM,
            DatasetConstants.PLATFORM_HDFS);
    Path dataDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
    String dataset = Path.getPathWithoutSchemeAndAuthority(dataDir).toString();
    DatasetDescriptor datasetDescriptor = new DatasetDescriptor(platform, dataset);

    String partitionName = workUnit.getProp(ConfigurationKeys.WORK_UNIT_DATE_PARTITION_NAME);
    PartitionDescriptor descriptor = new PartitionDescriptor(partitionName, datasetDescriptor);

    lineageInfo.get().setSource(descriptor, workUnit);
}

From source file:org.apache.kylin.common.KylinConfigBase.java

License:Apache License

public String getReadHdfsWorkingDirectory() {
    if (StringUtils.isNotEmpty(getHBaseClusterFs())) {
        Path workingDir = new Path(getHdfsWorkingDirectory());
        return new Path(getHBaseClusterFs(), Path.getPathWithoutSchemeAndAuthority(workingDir)).toString()
                + "/";
    }//from w ww  . j  a v a 2 s. c  o m

    return getHdfsWorkingDirectory();
}

From source file:org.apache.kylin.common.persistence.JDBCResourceDAO.java

License:Apache License

public Path bigCellHDFSPath(String resPath) {
    String hdfsWorkingDirectory = this.kylinConfig.getHdfsWorkingDirectory();
    Path redirectPath = new Path(hdfsWorkingDirectory, "resources-jdbc" + resPath);
    redirectPath = Path.getPathWithoutSchemeAndAuthority(redirectPath);
    return redirectPath;
}

From source file:org.apache.kylin.storage.hbase.HBaseConnection.java

License:Apache License

public static String makeQualifiedPathInHBaseCluster(String inPath) {
    Path path = new Path(inPath);
    path = Path.getPathWithoutSchemeAndAuthority(path);

    try {//from   w  ww  .  jav  a  2 s  . c  o m
        FileSystem fs = FileSystem.get(getCurrentHBaseConfiguration());
        return fs.makeQualified(path).toString();
    } catch (IOException e) {
        throw new IllegalArgumentException("Cannot create FileSystem from current hbase cluster conf", e);
    }
}

From source file:org.apache.lens.server.util.ScannedPaths.java

License:Apache License

/**
 * Method that computes path of resources matching the input path or path regex pattern.
 * If provided path is a directory it additionally checks for the jar_order or glob_order file
 * that imposes ordering of resources and filters out other resources.
 *
 * Updates finalPaths List with matched paths and returns an iterator for matched paths.
 *//*from  w  ww . j  av a 2 s  .c  o m*/
private List<String> getMatchedPaths(Path pt, String type) {
    List<String> finalPaths = new ArrayList<>();
    InputStream resourceOrderIStream = null;
    FileSystem fs;

    try {
        fs = pt.getFileSystem(new Configuration());
        if (fs.exists(pt)) {
            if (fs.isFile(pt)) {
                /**
                 * CASE 1 : Direct FILE provided in path
                 **/
                finalPaths.add(pt.toUri().toString());
            } else if (fs.isDirectory(pt)) {
                /**
                 * CASE 2 : DIR provided in path
                 **/
                Path resourceOrderFile;
                FileStatus[] statuses;
                List<String> newMatches;
                List<String> resources;

                resourceOrderFile = new Path(pt, "jar_order");
                /** Add everything in dir if no jar_order or glob_order is present **/
                if (!fs.exists(resourceOrderFile)) {
                    resourceOrderFile = new Path(pt, "glob_order");
                    if (!fs.exists(resourceOrderFile)) {
                        resourceOrderFile = null;
                        /** Get matched resources recursively for all files **/
                        statuses = fs.globStatus(new Path(pt, "*"));
                        if (statuses != null) {
                            for (FileStatus st : statuses) {
                                newMatches = getMatchedPaths(st.getPath(), type);
                                finalPaths.addAll(newMatches);
                            }
                        }
                    }
                }
                if (resourceOrderFile != null) {
                    /** Else get jars as per order specified in jar_order/glob_order **/
                    resourceOrderIStream = fs.open(resourceOrderFile);
                    resources = IOUtils.readLines(resourceOrderIStream, Charset.forName("UTF-8"));
                    for (String resource : resources) {
                        if (StringUtils.isBlank(resource)) {
                            continue;
                        }
                        resource = resource.trim();

                        /** Get matched resources recursively for provided path/pattern **/
                        if (resource.startsWith("/") || resource.contains(":/")) {
                            newMatches = getMatchedPaths(new Path(resource), type);
                        } else {
                            newMatches = getMatchedPaths(new Path(pt, resource), type);
                        }
                        finalPaths.addAll(newMatches);
                    }
                }
            }
        } else {
            /**
             * CASE 3 : REGEX provided in path
             * */
            FileStatus[] statuses = fs.globStatus(Path.getPathWithoutSchemeAndAuthority(pt));
            if (statuses != null) {
                for (FileStatus st : statuses) {
                    List<String> newMatches = getMatchedPaths(st.getPath(), type);
                    finalPaths.addAll(newMatches);
                }
            }
        }
        filterDirsAndJarType(fs, finalPaths);
    } catch (FileNotFoundException fex) {
        log.error("File not found while scanning path. Path: {}, Type: {}", path, type, fex);
    } catch (Exception e) {
        log.error("Exception while initializing PathScanner. Path: {}, Type: {}", path, type, e);
    } finally {
        IOUtils.closeQuietly(resourceOrderIStream);
    }

    return finalPaths;
}

From source file:org.apache.sentry.core.common.utils.PathUtils.java

License:Apache License

/**
 * Parse a URI which can be HDFS, S3, SWIFT, WEBHDFS,etc. In either case it
 * should be on the same fs as the warehouse directory.
 */// w  w  w  .j a v a 2 s.  c  o m
public static String parseURI(String warehouseDir, String uri, boolean isLocal) throws URISyntaxException {
    Path warehouseDirPath = new Path(warehouseDir);
    Path uriPath = new Path(uri);

    if (uriPath.isAbsolute()) {
        // Merge warehouseDir and uri only when there is no scheme and authority
        // in uri.
        if (uriPath.isAbsoluteAndSchemeAuthorityNull()) {
            uriPath = uriPath.makeQualified(warehouseDirPath.toUri(), warehouseDirPath);
        }
        String uriScheme = uriPath.toUri().getScheme();
        String uriAuthority = uriPath.toUri().getAuthority();

        if (StringUtils.isEmpty(uriScheme) || isLocal) {
            uriScheme = LOCAL_FILE_SCHEMA;
            uriAuthority = "";
        }

        uriPath = new Path(uriScheme + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriAuthority)
                + Path.getPathWithoutSchemeAndAuthority(uriPath));
    } else {
        // don't support relative path
        throw new IllegalArgumentException("Invalid URI " + uri + ".");
    }
    return uriPath.toUri().toString();
}

From source file:org.apache.sentry.core.common.utils.PathUtils.java

License:Apache License

/**
 * Parse a URI which is on a local file system.
 *//*from  ww  w. j a  v a2s . co m*/
public static String parseLocalURI(String uri) throws URISyntaxException {
    Path uriPath = new Path(uri);
    if (uriPath.isAbsolute()) {
        uriPath = new Path(
                LOCAL_FILE_SCHEMA + AUTHORITY_PREFIX + StringUtils.trimToEmpty(uriPath.toUri().getAuthority())
                        + Path.getPathWithoutSchemeAndAuthority(uriPath));
    } else {
        throw new IllegalArgumentException("Parse URI does not work on relative URI: " + uri);
    }
    return uriPath.toUri().toString();
}