Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public File handleFileResult(Path path) {
    FileSystem fs = null;//from   w w  w  .ja v a  2 s  .co  m
    try {
        Path outputPartPath = new Path(path, "part-r-00000");
        // copy file from HDFS to local temporary file
        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Source file is " + outputPartPath.toString());
        Configuration conf = new Configuration();

        HBaseStorage.configureHBaseConfig(conf);

        HBaseConfiguration.addHbaseResources(conf);
        fs = FileSystem.get(conf);
        File createTempFile = File.createTempFile("vcf", "out");

        createTempFile.delete();
        Path outPath = new Path(createTempFile.toURI());
        FileSystem localSystem = FileSystem.get(new Configuration());

        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Destination file is " + outPath.toString());
        if (!fs.exists(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found");
        }

        if (!fs.isFile(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file");
        }

        if (localSystem.exists(outPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists");
        }
        // doesn't quite work yet, no time to finish before poster, check results manually on hdfs

        FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf);
        return new File(outPath.toUri());
    } catch (IOException ex) {
        Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex);
    } finally {
        if (fs != null) {
            try {
                fs.delete(path, true);
            } catch (IOException ex) {
                Logger.getLogger(VCFDumperPlugin.class.getName())
                        .warn("IOException when clearing after text output", ex);
            }
        }
    }

    return null;
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

@Override
public ReturnType get() {
    try {//from ww  w  . j a  v  a2  s  .  co  m
        job.waitForCompletion(true);
        if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.COUNTER) {
            return (ReturnType) Long
                    .valueOf(job.getCounters().findCounter(MapperInterface.Counters.ROWS).getValue());
        } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.SGID) {
            SGID resultSGID = outputSet.getSGID();
            Class<? extends Atom> resultClass = (Class<? extends Atom>) mapReducePlugin.getResultClass();
            return (ReturnType) SWQEFactory.getQueryInterface().getLatestAtomBySGID(resultSGID, resultClass);
        } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.BATCHEDFEATURESET) {
            FeatureSet build = updateAndGet(outputSet);
            return (ReturnType) build;
        } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) {
            Path outputPath = TextOutputFormat.getOutputPath(job);
            FileSystem fs = FileSystem.get(job.getConfiguration());
            Path localPath = new Path(Files.createTempDir().toURI());
            fs.copyToLocalFile(outputPath, localPath);

            File outputFile = new File(localPath.toUri());
            return (ReturnType) outputFile;
        } else {
            throw new UnsupportedOperationException();

        }
    } catch (IOException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex);
    } catch (InterruptedException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex);
    } catch (ClassNotFoundException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex);
    }
    return null;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.java

License:Open Source License

@Override
protected void checkPath(Path path) {
    // Validate scheme
    super.checkPath(path);
    URI uri = path.toUri();
    String bucket = uri.getAuthority();
    // Bucketless URIs will be qualified later
    if (bucket == null || bucket.equals(rootBucket)) {
        return;//from w w w . j  a  v  a 2 s  .  c  o m
    } else {
        String msg = String.format("Wrong bucket: %s, in path: %s, expected bucket: %s", bucket, path,
                rootBucket);
        throw new IllegalArgumentException(msg);
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.java

License:Open Source License

/**
 * Translates a "gs:/" style hadoopPath (or relative path which is not fully-qualified) into
 * the appropriate GCS path which is compatible with the underlying GcsFs or gsutil.
 *///w  w  w.  j ava 2s . c  o  m
@Override
public URI getGcsPath(Path hadoopPath) {
    LOG.debug("GHFS.getGcsPath: {}", hadoopPath);

    // Convert to fully qualified absolute path; the Path object will callback to get our current
    // workingDirectory as part of fully resolving the path.
    Path resolvedPath = makeQualified(hadoopPath);

    String objectName = resolvedPath.toUri().getPath();
    if (objectName != null && resolvedPath.isAbsolute()) {
        // Strip off leading '/' because GoogleCloudStorageFileSystem.getPath appends it explicitly
        // between bucket and objectName.
        objectName = objectName.substring(1);
    }

    // Construct GCS path uri.
    URI gcsPath = gcsfs.getPathCodec().getPath(rootBucket, objectName, true);
    LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath);
    return gcsPath;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 *
 * <p> Overridden to make root it's own parent. This is POSIX compliant, but more importantly
 * guards against poor directory accounting in the PathData class of Hadoop 2's FsShell.
 *///from   w  ww .j  a v  a 2  s.  c o  m
@Override
public Path makeQualified(Path path) {
    LOG.debug("GHFS.makeQualified: path: {}", path);
    Path qualifiedPath = super.makeQualified(path);

    URI uri = qualifiedPath.toUri();

    Preconditions.checkState("".equals(uri.getPath()) || qualifiedPath.isAbsolute(),
            "Path '%s' must be fully qualified.", qualifiedPath);

    // Strip initial '..'s to make root is its own parent.
    StringBuilder sb = new StringBuilder(uri.getPath());
    while (sb.indexOf("/../") == 0) {
        // Leave a preceding slash, so path is still absolute.
        sb.delete(0, 3);
    }

    String strippedPath = sb.toString();

    // Allow a Path of gs://someBucket to map to gs://someBucket/
    if (strippedPath.equals("/..") || strippedPath.equals("")) {
        strippedPath = "/";
    }

    Path result = new Path(uri.getScheme(), uri.getAuthority(), strippedPath);
    LOG.debug("GHFS.makeQualified:=> {}", result);
    return result;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

@Override
protected void checkPath(Path path) {
    URI uri = path.toUri();
    String scheme = uri.getScheme();
    // Only check that the scheme matches. The authority and path will be
    // validated later.
    if (scheme == null || scheme.equalsIgnoreCase(getScheme())) {
        return;/* w ww.  jav a  2 s.c o  m*/
    } else {
        String msg = String.format("Wrong FS scheme: %s, in path: %s, expected scheme: %s", scheme, path,
                getScheme());
        throw new IllegalArgumentException(msg);
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Determines based on config settings and suitability of {@code fixedPath} whether to use
 * flat globbing logic where we use a single large listing during globStatus to then perform
 * the core globbing logic in-memory./*  w ww.  ja v a  2  s.  c  o m*/
 */
@VisibleForTesting
boolean shouldUseFlatGlob(Path fixedPath) {
    // Config setting overrides all else.
    if (!enableFlatGlob) {
        return false;
    }

    // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI
    // scheme for GCS.
    if (!getUri().getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
        LOG.debug("Flat glob is on, but doesn't work for scheme '{}'; usig default behavior.",
                getUri().getScheme());
        return false;
    }

    // The full pattern should have a wildcard, otherwise there's no point doing the flat glob.
    GlobPattern fullPattern = new GlobPattern(fixedPath.toString());
    if (!fullPattern.hasWildcard()) {
        LOG.debug("Flat glob is on, but Path '{}' has no wildcard; using default behavior.", fixedPath);
        return false;
    }

    // To use a flat glob, there must be an authority defined.
    if (Strings.isNullOrEmpty(fixedPath.toUri().getAuthority())) {
        LOG.info("Flat glob is on, but Path '{}' has a empty authority, using default behavior.", fixedPath);
        return false;
    }

    // And the authority must not contain a wildcard.
    GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority());
    if (authorityPattern.hasWildcard()) {
        LOG.info("Flat glob is on, but Path '{}' has a wildcard authority, using default behavior.", fixedPath);
        return false;
    }

    return true;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Returns an array of FileStatus objects whose path names match pathPattern
 * and is accepted by the user-supplied path filter. Results are sorted by
 * their path names./*from  w  ww .ja  v a2 s .c  o m*/
 *
 * Return null if pathPattern has no glob and the path does not exist.
 * Return an empty array if pathPattern has a glob and no path matches it.
 *
 * @param pathPattern A regular expression specifying the path pattern.
 * @param filter A user-supplied path filter.
 * @return An array of FileStatus objects.
 * @throws IOException if an error occurs.
 */
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {

    checkOpen();

    LOG.debug("GHFS.globStatus: {}", pathPattern);
    // URI does not handle glob expressions nicely, for the purpose of
    // fully-qualifying a path we can URI-encode them.
    // Using toString() to avoid Path(URI) constructor.
    Path encodedPath = new Path(pathPattern.toUri().toString());
    // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in
    // the correct format. See note in getHadoopPath for more information.
    Path fixedPath = getHadoopPath(getGcsPath(encodedPath));
    // Decode URI-encoded path back into a glob path.
    fixedPath = new Path(URI.create(fixedPath.toString()));
    LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath);

    if (shouldUseFlatGlob(fixedPath)) {
        String pathString = fixedPath.toString();
        String prefixString = trimToPrefixWithoutGlob(pathString);
        Path prefixPath = new Path(prefixString);
        URI prefixUri = getGcsPath(prefixPath);

        if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) {
            // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing
            // slash so that we don't wastefully list sibling files which may match the directory-name
            // as a strict prefix but would've been omitted due to not containing the '/' at the end.
            prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri);
        }

        // Get everything matching the non-glob prefix.
        LOG.debug("Listing everything with prefix '{}'", prefixUri);
        List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri);
        if (fileInfos.isEmpty()) {
            // Let the superclass define the proper logic for finding no matches.
            return super.globStatus(fixedPath, filter);
        }

        // Perform the core globbing logic in the helper filesystem.
        GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs,
                fileInfos);
        FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter);

        // If the return list contains directories, we should repair them if they're 'implicit'.
        if (enableAutoRepairImplicitDirectories) {
            List<URI> toRepair = new ArrayList<>();
            for (FileStatus status : returnList) {
                // Modification time of 0 indicates implicit directory.
                if (status.isDir() && status.getModificationTime() == 0) {
                    toRepair.add(getGcsPath(status.getPath()));
                }
            }
            if (!toRepair.isEmpty()) {
                LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size());
                gcsfs.repairDirs(toRepair);
            }
        }
        return returnList;
    } else {
        FileStatus[] ret = super.globStatus(fixedPath, filter);
        if (ret == null) {
            if (enableAutoRepairImplicitDirectories) {
                LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern);
                if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) {
                    LOG.warn("Success repairing '{}', re-globbing.", pathPattern);
                    ret = super.globStatus(fixedPath, filter);
                }
            }
        }
        return ret;
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java

License:Open Source License

/**
  * Validates makeQualified() when working directory is not root.
  */// w w w . ja v a  2s.c o m
@Test
public void testMakeQualifiedNotRoot() {
    GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs;
    Path fsRootPath = myGhfs.getFileSystemRoot();
    URI fsRootUri = fsRootPath.toUri();
    String fsRoot = fsRootPath.toString();
    String workingParent = fsRoot + "working/";
    String workingDir = workingParent + "dir";
    myGhfs.setWorkingDirectory(new Path(workingDir));
    Map<String, String> qualifiedPaths = new HashMap<>();
    qualifiedPaths.put("/", fsRoot);
    qualifiedPaths.put("/foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(".", workingDir);
    qualifiedPaths.put("foo", workingDir + "/foo");
    qualifiedPaths.put("foo/bar", workingDir + "/foo/bar");
    qualifiedPaths.put(fsRoot, fsRoot);
    qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("/foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("foo/../foo", workingDir + "/foo");
    qualifiedPaths.put("foo/bar/../../foo/bar", workingDir + "/foo/bar");
    qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("..", workingParent);
    qualifiedPaths.put("../..", fsRoot);
    qualifiedPaths.put("../foo", workingParent + "/foo");
    qualifiedPaths.put("../foo/bar", workingParent + "/foo/bar");
    qualifiedPaths.put("../foo/../foo", workingParent + "/foo");
    qualifiedPaths.put("../foo/bar/../../foo/bar", workingParent + "/foo/bar");
    qualifiedPaths.put(workingDir + "/../foo/../foo", workingParent + "/foo");
    qualifiedPaths.put(workingDir + "/../foo/bar/../../foo/bar", workingParent + "/foo/bar");
    qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar");
    qualifiedPaths.put("..foo/bar", workingDir + "/..foo/bar");

    // GHFS specific behavior where root is it's own parent.
    qualifiedPaths.put("/..", fsRoot);
    qualifiedPaths.put("/../../..", fsRoot);
    qualifiedPaths.put("/../foo/", fsRoot + "foo");
    qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../../..", fsRoot);
    qualifiedPaths.put(fsRoot + "..", fsRoot);
    qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../../../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar");

    // Skip for authority-less gsg paths.
    if (fsRootUri.getAuthority() != null) {
        // When the path to qualify is of the form gs://somebucket, we want to qualify
        // it as gs://someBucket/
        qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot);
    }

    for (String unqualifiedString : qualifiedPaths.keySet()) {
        Path unqualifiedPath = new Path(unqualifiedString);
        Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString));
        Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath));
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates makeQualified() when working directory is root.
 */// ww w .  j a  v  a 2s.  co  m
@Test
public void testMakeQualifiedRoot() {
    GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs;
    myGhfs.setWorkingDirectory(myGhfs.getFileSystemRoot());
    Path fsRootPath = myGhfs.getFileSystemRoot();
    URI fsRootUri = fsRootPath.toUri();
    String fsRoot = fsRootPath.toString();
    Map<String, String> qualifiedPaths = new HashMap<>();
    qualifiedPaths.put("/", fsRoot);
    qualifiedPaths.put("/foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(".", fsRoot);
    qualifiedPaths.put("foo", fsRoot + "foo");
    qualifiedPaths.put("foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot, fsRoot);
    qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("/foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar");
    qualifiedPaths.put("..foo/bar", fsRoot + "..foo/bar");

    // GHFS specific behavior where root is it's own parent.
    qualifiedPaths.put("/..", fsRoot);
    qualifiedPaths.put("/../../..", fsRoot);
    qualifiedPaths.put("/../foo/", fsRoot + "foo");
    qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("..", fsRoot);
    qualifiedPaths.put("../..", fsRoot);
    qualifiedPaths.put("../foo", fsRoot + "foo");
    qualifiedPaths.put("../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "..", fsRoot);
    qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "foo/../../../../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar/../../../../../foo/bar", fsRoot + "foo/bar");

    // Skip for authority-less gsg paths.
    if (fsRootUri.getAuthority() != null) {
        // When the path to qualify is of the form gs://somebucket, we want to qualify
        // it as gs://someBucket/
        qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot);
    }

    for (String unqualifiedString : qualifiedPaths.keySet()) {
        Path unqualifiedPath = new Path(unqualifiedString);
        Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString));
        Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath));
    }
}