List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public File handleFileResult(Path path) { FileSystem fs = null;//from w w w .ja v a 2 s .co m try { Path outputPartPath = new Path(path, "part-r-00000"); // copy file from HDFS to local temporary file Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Source file is " + outputPartPath.toString()); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); fs = FileSystem.get(conf); File createTempFile = File.createTempFile("vcf", "out"); createTempFile.delete(); Path outPath = new Path(createTempFile.toURI()); FileSystem localSystem = FileSystem.get(new Configuration()); Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Destination file is " + outPath.toString()); if (!fs.exists(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found"); } if (!fs.isFile(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file"); } if (localSystem.exists(outPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists"); } // doesn't quite work yet, no time to finish before poster, check results manually on hdfs FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf); return new File(outPath.toUri()); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex); } finally { if (fs != null) { try { fs.delete(path, true); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()) .warn("IOException when clearing after text output", ex); } } } return null; }
From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java
License:Open Source License
@Override public ReturnType get() { try {//from ww w . j a v a2 s . co m job.waitForCompletion(true); if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.COUNTER) { return (ReturnType) Long .valueOf(job.getCounters().findCounter(MapperInterface.Counters.ROWS).getValue()); } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.SGID) { SGID resultSGID = outputSet.getSGID(); Class<? extends Atom> resultClass = (Class<? extends Atom>) mapReducePlugin.getResultClass(); return (ReturnType) SWQEFactory.getQueryInterface().getLatestAtomBySGID(resultSGID, resultClass); } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.BATCHEDFEATURESET) { FeatureSet build = updateAndGet(outputSet); return (ReturnType) build; } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { Path outputPath = TextOutputFormat.getOutputPath(job); FileSystem fs = FileSystem.get(job.getConfiguration()); Path localPath = new Path(Files.createTempDir().toURI()); fs.copyToLocalFile(outputPath, localPath); File outputFile = new File(localPath.toUri()); return (ReturnType) outputFile; } else { throw new UnsupportedOperationException(); } } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } return null; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.java
License:Open Source License
@Override protected void checkPath(Path path) { // Validate scheme super.checkPath(path); URI uri = path.toUri(); String bucket = uri.getAuthority(); // Bucketless URIs will be qualified later if (bucket == null || bucket.equals(rootBucket)) { return;//from w w w . j a v a 2 s . c o m } else { String msg = String.format("Wrong bucket: %s, in path: %s, expected bucket: %s", bucket, path, rootBucket); throw new IllegalArgumentException(msg); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.java
License:Open Source License
/** * Translates a "gs:/" style hadoopPath (or relative path which is not fully-qualified) into * the appropriate GCS path which is compatible with the underlying GcsFs or gsutil. *///w w w. j ava 2s . c o m @Override public URI getGcsPath(Path hadoopPath) { LOG.debug("GHFS.getGcsPath: {}", hadoopPath); // Convert to fully qualified absolute path; the Path object will callback to get our current // workingDirectory as part of fully resolving the path. Path resolvedPath = makeQualified(hadoopPath); String objectName = resolvedPath.toUri().getPath(); if (objectName != null && resolvedPath.isAbsolute()) { // Strip off leading '/' because GoogleCloudStorageFileSystem.getPath appends it explicitly // between bucket and objectName. objectName = objectName.substring(1); } // Construct GCS path uri. URI gcsPath = gcsfs.getPathCodec().getPath(rootBucket, objectName, true); LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath); return gcsPath; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * * <p> Overridden to make root it's own parent. This is POSIX compliant, but more importantly * guards against poor directory accounting in the PathData class of Hadoop 2's FsShell. *///from w ww .j a v a 2 s. c o m @Override public Path makeQualified(Path path) { LOG.debug("GHFS.makeQualified: path: {}", path); Path qualifiedPath = super.makeQualified(path); URI uri = qualifiedPath.toUri(); Preconditions.checkState("".equals(uri.getPath()) || qualifiedPath.isAbsolute(), "Path '%s' must be fully qualified.", qualifiedPath); // Strip initial '..'s to make root is its own parent. StringBuilder sb = new StringBuilder(uri.getPath()); while (sb.indexOf("/../") == 0) { // Leave a preceding slash, so path is still absolute. sb.delete(0, 3); } String strippedPath = sb.toString(); // Allow a Path of gs://someBucket to map to gs://someBucket/ if (strippedPath.equals("/..") || strippedPath.equals("")) { strippedPath = "/"; } Path result = new Path(uri.getScheme(), uri.getAuthority(), strippedPath); LOG.debug("GHFS.makeQualified:=> {}", result); return result; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
@Override protected void checkPath(Path path) { URI uri = path.toUri(); String scheme = uri.getScheme(); // Only check that the scheme matches. The authority and path will be // validated later. if (scheme == null || scheme.equalsIgnoreCase(getScheme())) { return;/* w ww. jav a 2 s.c o m*/ } else { String msg = String.format("Wrong FS scheme: %s, in path: %s, expected scheme: %s", scheme, path, getScheme()); throw new IllegalArgumentException(msg); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Determines based on config settings and suitability of {@code fixedPath} whether to use * flat globbing logic where we use a single large listing during globStatus to then perform * the core globbing logic in-memory./* w ww. ja v a 2 s. c o m*/ */ @VisibleForTesting boolean shouldUseFlatGlob(Path fixedPath) { // Config setting overrides all else. if (!enableFlatGlob) { return false; } // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI // scheme for GCS. if (!getUri().getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) { LOG.debug("Flat glob is on, but doesn't work for scheme '{}'; usig default behavior.", getUri().getScheme()); return false; } // The full pattern should have a wildcard, otherwise there's no point doing the flat glob. GlobPattern fullPattern = new GlobPattern(fixedPath.toString()); if (!fullPattern.hasWildcard()) { LOG.debug("Flat glob is on, but Path '{}' has no wildcard; using default behavior.", fixedPath); return false; } // To use a flat glob, there must be an authority defined. if (Strings.isNullOrEmpty(fixedPath.toUri().getAuthority())) { LOG.info("Flat glob is on, but Path '{}' has a empty authority, using default behavior.", fixedPath); return false; } // And the authority must not contain a wildcard. GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority()); if (authorityPattern.hasWildcard()) { LOG.info("Flat glob is on, but Path '{}' has a wildcard authority, using default behavior.", fixedPath); return false; } return true; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Returns an array of FileStatus objects whose path names match pathPattern * and is accepted by the user-supplied path filter. Results are sorted by * their path names./*from w ww .ja v a2 s .c o m*/ * * Return null if pathPattern has no glob and the path does not exist. * Return an empty array if pathPattern has a glob and no path matches it. * * @param pathPattern A regular expression specifying the path pattern. * @param filter A user-supplied path filter. * @return An array of FileStatus objects. * @throws IOException if an error occurs. */ @Override public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { checkOpen(); LOG.debug("GHFS.globStatus: {}", pathPattern); // URI does not handle glob expressions nicely, for the purpose of // fully-qualifying a path we can URI-encode them. // Using toString() to avoid Path(URI) constructor. Path encodedPath = new Path(pathPattern.toUri().toString()); // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in // the correct format. See note in getHadoopPath for more information. Path fixedPath = getHadoopPath(getGcsPath(encodedPath)); // Decode URI-encoded path back into a glob path. fixedPath = new Path(URI.create(fixedPath.toString())); LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath); if (shouldUseFlatGlob(fixedPath)) { String pathString = fixedPath.toString(); String prefixString = trimToPrefixWithoutGlob(pathString); Path prefixPath = new Path(prefixString); URI prefixUri = getGcsPath(prefixPath); if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) { // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing // slash so that we don't wastefully list sibling files which may match the directory-name // as a strict prefix but would've been omitted due to not containing the '/' at the end. prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri); } // Get everything matching the non-glob prefix. LOG.debug("Listing everything with prefix '{}'", prefixUri); List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri); if (fileInfos.isEmpty()) { // Let the superclass define the proper logic for finding no matches. return super.globStatus(fixedPath, filter); } // Perform the core globbing logic in the helper filesystem. GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs, fileInfos); FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter); // If the return list contains directories, we should repair them if they're 'implicit'. if (enableAutoRepairImplicitDirectories) { List<URI> toRepair = new ArrayList<>(); for (FileStatus status : returnList) { // Modification time of 0 indicates implicit directory. if (status.isDir() && status.getModificationTime() == 0) { toRepair.add(getGcsPath(status.getPath())); } } if (!toRepair.isEmpty()) { LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size()); gcsfs.repairDirs(toRepair); } } return returnList; } else { FileStatus[] ret = super.globStatus(fixedPath, filter); if (ret == null) { if (enableAutoRepairImplicitDirectories) { LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern); if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) { LOG.warn("Success repairing '{}', re-globbing.", pathPattern); ret = super.globStatus(fixedPath, filter); } } } return ret; } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * Validates makeQualified() when working directory is not root. */// w w w . ja v a 2s.c o m @Test public void testMakeQualifiedNotRoot() { GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs; Path fsRootPath = myGhfs.getFileSystemRoot(); URI fsRootUri = fsRootPath.toUri(); String fsRoot = fsRootPath.toString(); String workingParent = fsRoot + "working/"; String workingDir = workingParent + "dir"; myGhfs.setWorkingDirectory(new Path(workingDir)); Map<String, String> qualifiedPaths = new HashMap<>(); qualifiedPaths.put("/", fsRoot); qualifiedPaths.put("/foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(".", workingDir); qualifiedPaths.put("foo", workingDir + "/foo"); qualifiedPaths.put("foo/bar", workingDir + "/foo/bar"); qualifiedPaths.put(fsRoot, fsRoot); qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("/foo/../foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("foo/../foo", workingDir + "/foo"); qualifiedPaths.put("foo/bar/../../foo/bar", workingDir + "/foo/bar"); qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("..", workingParent); qualifiedPaths.put("../..", fsRoot); qualifiedPaths.put("../foo", workingParent + "/foo"); qualifiedPaths.put("../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put("../foo/../foo", workingParent + "/foo"); qualifiedPaths.put("../foo/bar/../../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put(workingDir + "/../foo/../foo", workingParent + "/foo"); qualifiedPaths.put(workingDir + "/../foo/bar/../../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar"); qualifiedPaths.put("..foo/bar", workingDir + "/..foo/bar"); // GHFS specific behavior where root is it's own parent. qualifiedPaths.put("/..", fsRoot); qualifiedPaths.put("/../../..", fsRoot); qualifiedPaths.put("/../foo/", fsRoot + "foo"); qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../../..", fsRoot); qualifiedPaths.put(fsRoot + "..", fsRoot); qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../../../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar"); // Skip for authority-less gsg paths. if (fsRootUri.getAuthority() != null) { // When the path to qualify is of the form gs://somebucket, we want to qualify // it as gs://someBucket/ qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot); } for (String unqualifiedString : qualifiedPaths.keySet()) { Path unqualifiedPath = new Path(unqualifiedString); Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString)); Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath)); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * Validates makeQualified() when working directory is root. */// ww w . j a v a 2s. co m @Test public void testMakeQualifiedRoot() { GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs; myGhfs.setWorkingDirectory(myGhfs.getFileSystemRoot()); Path fsRootPath = myGhfs.getFileSystemRoot(); URI fsRootUri = fsRootPath.toUri(); String fsRoot = fsRootPath.toString(); Map<String, String> qualifiedPaths = new HashMap<>(); qualifiedPaths.put("/", fsRoot); qualifiedPaths.put("/foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(".", fsRoot); qualifiedPaths.put("foo", fsRoot + "foo"); qualifiedPaths.put("foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot, fsRoot); qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("/foo/../foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("foo/../foo", fsRoot + "foo"); qualifiedPaths.put("foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar"); qualifiedPaths.put("..foo/bar", fsRoot + "..foo/bar"); // GHFS specific behavior where root is it's own parent. qualifiedPaths.put("/..", fsRoot); qualifiedPaths.put("/../../..", fsRoot); qualifiedPaths.put("/../foo/", fsRoot + "foo"); qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("..", fsRoot); qualifiedPaths.put("../..", fsRoot); qualifiedPaths.put("../foo", fsRoot + "foo"); qualifiedPaths.put("../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "..", fsRoot); qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "../foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "foo/../../../../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../../../../foo/bar", fsRoot + "foo/bar"); // Skip for authority-less gsg paths. if (fsRootUri.getAuthority() != null) { // When the path to qualify is of the form gs://somebucket, we want to qualify // it as gs://someBucket/ qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot); } for (String unqualifiedString : qualifiedPaths.keySet()) { Path unqualifiedPath = new Path(unqualifiedString); Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString)); Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath)); } }