List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.github.sakserv.minicluster.yarn.InJvmContainerExecutor.java
License:Apache License
/** * Extracts {@link LocalResource}s from the {@link Container}. *//*from w w w . j a va 2 s. com*/ @SuppressWarnings("unchecked") private Set<Path> extractUserProvidedClassPathEntries(Container container) { Map<Path, List<String>> localizedResources; try { Field lf = container.getClass().getDeclaredField("localizedResources"); lf.setAccessible(true); localizedResources = (Map<Path, List<String>>) lf.get(container); Set<Path> paths = localizedResources.keySet(); // Needed for Tez for (Path path : paths) { if (path.toString().endsWith("tez-conf.pb") || path.toString().endsWith("tez-dag.pb")) { File sourceFile = new File(path.toUri()); File targetFile = new File(System.getenv(Environment.PWD.name()) + "/" + sourceFile.getName()); FileUtils.copyFile(sourceFile, targetFile); // System.out.println("######## Copied file: " + targetFile); // FileInputStream fis = new FileInputStream(new File(System.getenv(Environment.PWD.name()), targetFile.getName())); // System.out.println(fis.available()); // fis.close(); // break; } } return paths; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public File handleFileResult(Path path) { FileSystem fs = null;//from w ww . j a va 2 s . c om try { Path outputPartPath = new Path(path, "part-r-00000"); // copy file from HDFS to local temporary file Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Source file is " + outputPartPath.toString()); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); fs = FileSystem.get(conf); File createTempFile = File.createTempFile("vcf", "out"); createTempFile.delete(); Path outPath = new Path(createTempFile.toURI()); FileSystem localSystem = FileSystem.get(new Configuration()); Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Destination file is " + outPath.toString()); if (!fs.exists(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found"); } if (!fs.isFile(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file"); } if (localSystem.exists(outPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists"); } // doesn't quite work yet, no time to finish before poster, check results manually on hdfs FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf); return new File(outPath.toUri()); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex); } finally { if (fs != null) { try { fs.delete(path, true); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()) .warn("IOException when clearing after text output", ex); } } } return null; }
From source file:com.github.ygf.pagerank.InLinks.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 3) { System.out.println("Usage: InLinks <links-simple-sorted.txt> <titles-dir> <output-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/* w w w . jav a 2 s. c o m*/ Path linksFile = new Path(args[0]); Path titlesDir = new Path(args[1]); Path outputDir = new Path(args[2]); Configuration conf = getConf(); // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls // try to read the _SUCCESS as another MapFile dir. conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false"); // Default values of the parameters of the algorithm. conf.setInt("inlinks.top_results", conf.getInt("inlinks.top_results", 100)); conf.set("inlinks.titles_dir", titlesDir.toString()); computeInLinks(conf, linksFile, outputDir); summarizeResults(conf, outputDir); return 0; }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 3) { System.out.println("Usage: PageRank <links-simple-sorted.txt> <titles-dir> <output-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w w w . jav a2 s . com Path linksFile = new Path(args[0]); Path titlesDir = new Path(args[1]); Path outputDir = new Path(args[2]); Configuration conf = getConf(); // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls // try to read the _SUCCESS as another MapFile dir. conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false"); // Default values of the parameters of the algorithm. conf.setInt("pagerank.block_size", conf.getInt("pagerank.block_size", 10000)); conf.setInt("pagerank.max_iterations", conf.getInt("pagerank.max_iterations", 2)); conf.setFloat("pagerank.damping_factor", conf.getFloat("pagerank.damping_factor", 0.85f)); conf.setInt("pagerank.top_results", conf.getInt("pagerank.top_results", 100)); conf.set("pagerank.titles_dir", titlesDir.toString()); int numPages = getNumPages(conf, titlesDir); conf.setLong("pagerank.num_pages", numPages); createTransitionMatrix(conf, linksFile, outputDir); int maxIters = Integer.parseInt(conf.get("pagerank.max_iterations")); for (int iter = 1; iter <= maxIters; iter++) { conf.setInt("pagerank.iteration", iter); pageRankIteration(iter, conf, outputDir); cleanPreviousIteration(iter, conf, outputDir); } summarizeResults(maxIters, conf, outputDir); return 0; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Gets status of the given path item./*from ww w. j a v a 2s .c o m*/ * * @param hadoopPath The path we want information about. * @return A FileStatus object for the given path. * @throws FileNotFoundException when the path does not exist; * @throws IOException on other errors. */ @Override public FileStatus getFileStatus(Path hadoopPath) throws IOException { long startTime = System.nanoTime(); Preconditions.checkArgument(hadoopPath != null, "hadoopPath must not be null"); checkOpen(); LOG.debug("GHFS.getFileStatus: {}", hadoopPath); URI gcsPath = getGcsPath(hadoopPath); FileInfo fileInfo = gcsfs.getFileInfo(gcsPath); if (!fileInfo.exists()) { LOG.debug("GHFS.getFileStatus: not found: {}", gcsPath); String msg = fileInfo.isDirectory() ? "Directory not found : " : "File not found : "; msg += hadoopPath.toString(); throw new FileNotFoundException(msg); } FileStatus status = getFileStatus(fileInfo); long duration = System.nanoTime() - startTime; increment(Counter.GET_FILE_STATUS); increment(Counter.GET_FILE_STATUS_TIME, duration); return status; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Determines based on config settings and suitability of {@code fixedPath} whether to use * flat globbing logic where we use a single large listing during globStatus to then perform * the core globbing logic in-memory.//from w ww. j a v a 2 s .c o m */ @VisibleForTesting boolean shouldUseFlatGlob(Path fixedPath) { // Config setting overrides all else. if (!enableFlatGlob) { return false; } // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI // scheme for GCS. if (!getUri().getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) { LOG.debug("Flat glob is on, but doesn't work for scheme '{}'; usig default behavior.", getUri().getScheme()); return false; } // The full pattern should have a wildcard, otherwise there's no point doing the flat glob. GlobPattern fullPattern = new GlobPattern(fixedPath.toString()); if (!fullPattern.hasWildcard()) { LOG.debug("Flat glob is on, but Path '{}' has no wildcard; using default behavior.", fixedPath); return false; } // To use a flat glob, there must be an authority defined. if (Strings.isNullOrEmpty(fixedPath.toUri().getAuthority())) { LOG.info("Flat glob is on, but Path '{}' has a empty authority, using default behavior.", fixedPath); return false; } // And the authority must not contain a wildcard. GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority()); if (authorityPattern.hasWildcard()) { LOG.info("Flat glob is on, but Path '{}' has a wildcard authority, using default behavior.", fixedPath); return false; } return true; }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Returns an array of FileStatus objects whose path names match pathPattern * and is accepted by the user-supplied path filter. Results are sorted by * their path names.//ww w . j av a 2s .co m * * Return null if pathPattern has no glob and the path does not exist. * Return an empty array if pathPattern has a glob and no path matches it. * * @param pathPattern A regular expression specifying the path pattern. * @param filter A user-supplied path filter. * @return An array of FileStatus objects. * @throws IOException if an error occurs. */ @Override public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { checkOpen(); LOG.debug("GHFS.globStatus: {}", pathPattern); // URI does not handle glob expressions nicely, for the purpose of // fully-qualifying a path we can URI-encode them. // Using toString() to avoid Path(URI) constructor. Path encodedPath = new Path(pathPattern.toUri().toString()); // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in // the correct format. See note in getHadoopPath for more information. Path fixedPath = getHadoopPath(getGcsPath(encodedPath)); // Decode URI-encoded path back into a glob path. fixedPath = new Path(URI.create(fixedPath.toString())); LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath); if (shouldUseFlatGlob(fixedPath)) { String pathString = fixedPath.toString(); String prefixString = trimToPrefixWithoutGlob(pathString); Path prefixPath = new Path(prefixString); URI prefixUri = getGcsPath(prefixPath); if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) { // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing // slash so that we don't wastefully list sibling files which may match the directory-name // as a strict prefix but would've been omitted due to not containing the '/' at the end. prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri); } // Get everything matching the non-glob prefix. LOG.debug("Listing everything with prefix '{}'", prefixUri); List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri); if (fileInfos.isEmpty()) { // Let the superclass define the proper logic for finding no matches. return super.globStatus(fixedPath, filter); } // Perform the core globbing logic in the helper filesystem. GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs, fileInfos); FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter); // If the return list contains directories, we should repair them if they're 'implicit'. if (enableAutoRepairImplicitDirectories) { List<URI> toRepair = new ArrayList<>(); for (FileStatus status : returnList) { // Modification time of 0 indicates implicit directory. if (status.isDir() && status.getModificationTime() == 0) { toRepair.add(getGcsPath(status.getPath())); } } if (!toRepair.isEmpty()) { LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size()); gcsfs.repairDirs(toRepair); } } return returnList; } else { FileStatus[] ret = super.globStatus(fixedPath, filter); if (ret == null) { if (enableAutoRepairImplicitDirectories) { LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern); if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) { LOG.warn("Success repairing '{}', re-globbing.", pathPattern); ret = super.globStatus(fixedPath, filter); } } } return ret; } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemIntegrationTest.java
License:Open Source License
/** * Validates initialize() with configuration key fs.gs.working.dir set. */// ww w . j a v a2 s . c o m @Test @Override public void testInitializeWithWorkingDirectory() throws IOException, URISyntaxException { // We can just test by calling initialize multiple times (for each test condition) because // there is nothing in initialize() which must be run only once. If this changes, this test // method will need to resort to using a new GoogleHadoopFileSystem() for each item // in the for-loop. GoogleHadoopFileSystem myGhfs = (GoogleHadoopFileSystem) ghfs; Configuration config = new Configuration(); config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, bucketName); ghfs.initialize(myGhfs.initUri, config); // setUpWorkingDirectoryTest() depends on getFileSystemRoot(), which in turn depends on // having initialized with the desired systemBucket. If we tried to call this before // ghfs.initialize on the preceding line, the test may or may not succeed depending on // whether the last test case happened to set systemBucket to bucketName already. List<WorkingDirData> wddList = setUpWorkingDirectoryTest(); String rootBucketName = myGhfs.getRootBucketName(); for (WorkingDirData wdd : wddList) { Path path = wdd.path; Path expectedWorkingDir = wdd.expectedPath; Path currentWorkingDir = ghfs.getWorkingDirectory(); config.set(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, path.toString()); ghfs.initialize(myGhfs.initUri, config); Path newWorkingDir = ghfs.getWorkingDirectory(); if (expectedWorkingDir != null) { Assert.assertEquals(expectedWorkingDir, newWorkingDir); } else { Assert.assertEquals(currentWorkingDir, newWorkingDir); } } Assert.assertTrue(ghfs.getHomeDirectory().toString().startsWith("gs://" + rootBucketName)); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * Makes listStatus and globStatus perform repairs by first creating an object directly without * creating its parent directory object. *///from ww w.j a va2 s . c o m @Test public void testRepairImplicitDirectory() throws IOException, URISyntaxException { GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs; GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs(); URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path parentPath = ghfsHelper.castAsHadoopPath(seedUri); URI parentUri = myghfs.getGcsPath(parentPath); // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet. Path subdirPath = new Path(parentPath, "foo-subdir"); URI subdirUri = myghfs.getGcsPath(subdirPath); Path leafPath = new Path(subdirPath, "bar-subdir"); URI leafUri = myghfs.getGcsPath(leafPath); gcsfs.mkdir(leafUri); boolean inferImplicitDirectories = gcsfs.getOptions().getCloudStorageOptions() .isInferImplicitDirectoriesEnabled(); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertFalse("Expected to !exist: " + parentUri, gcsfs.exists(parentUri)); } myghfs.listStatus(parentPath); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); ghfsHelper.clearBucket(bucketName); // Reset for globStatus. gcsfs.mkdir(leafUri); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertFalse("Expected to !exist: " + parentUri, gcsfs.exists(parentUri)); } myghfs.globStatus(parentPath); // Globbing the single directory only repairs that top-level directory; it is *not* the same // as listStatus. Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); } Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); ghfsHelper.clearBucket(bucketName); // Reset for globStatus(path/*) gcsfs.mkdir(leafUri); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertFalse("Expected to !exist: " + parentUri, gcsfs.exists(parentUri)); } // When globbing children, the parent will only be repaired if flat-globbing is not enabled. Path globChildrenPath = new Path(parentPath.toString() + "/*"); myghfs.globStatus(globChildrenPath); boolean expectParentRepair = !myghfs.shouldUseFlatGlob(globChildrenPath); // This will internally call listStatus, so will have the same behavior of repairing both // levels of subdirectories. Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); HadoopVersionInfo versionInfo = new HadoopVersionInfo(); if (versionInfo.isLessThan(2, 0) || versionInfo.isGreaterThan(2, 3)) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); if (expectParentRepair || inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } else { Assert.assertFalse("Expected not to exist due to flat globbing: " + parentUri, gcsfs.exists(parentUri)); } } ghfsHelper.clearBucket(bucketName); // Reset for globStatus(path*) gcsfs.mkdir(leafUri); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); Assert.assertFalse("Expected to !exist: " + parentUri, gcsfs.exists(parentUri)); } // Globbing with a wildcard in the parentUri itself also only repairs one level, but for // a different reason than globbing with no wildcard. Globbing with no wildcard requires // catching 'null' in globStatus, whereas having the wildcard causes the repair to happen // when listing parentOf(parentUri). myghfs.globStatus(new Path(parentPath.toString() + "*")); Assert.assertTrue("Expected to exist: " + leafUri, gcsfs.exists(leafUri)); if (inferImplicitDirectories) { Assert.assertTrue("Expected to exist: " + subdirUri, gcsfs.exists(subdirUri)); } else { Assert.assertFalse("Expected to !exist: " + subdirUri, gcsfs.exists(subdirUri)); } if (versionInfo.isLessThan(2, 0) || versionInfo.isGreaterThan(2, 3)) { Assert.assertTrue("Expected to exist: " + parentUri, gcsfs.exists(parentUri)); } ghfsHelper.clearBucket(bucketName); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * Validates makeQualified() when working directory is not root. *///from w w w. j ava 2 s. co m @Test public void testMakeQualifiedNotRoot() { GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs; Path fsRootPath = myGhfs.getFileSystemRoot(); URI fsRootUri = fsRootPath.toUri(); String fsRoot = fsRootPath.toString(); String workingParent = fsRoot + "working/"; String workingDir = workingParent + "dir"; myGhfs.setWorkingDirectory(new Path(workingDir)); Map<String, String> qualifiedPaths = new HashMap<>(); qualifiedPaths.put("/", fsRoot); qualifiedPaths.put("/foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(".", workingDir); qualifiedPaths.put("foo", workingDir + "/foo"); qualifiedPaths.put("foo/bar", workingDir + "/foo/bar"); qualifiedPaths.put(fsRoot, fsRoot); qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("/foo/../foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("foo/../foo", workingDir + "/foo"); qualifiedPaths.put("foo/bar/../../foo/bar", workingDir + "/foo/bar"); qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("..", workingParent); qualifiedPaths.put("../..", fsRoot); qualifiedPaths.put("../foo", workingParent + "/foo"); qualifiedPaths.put("../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put("../foo/../foo", workingParent + "/foo"); qualifiedPaths.put("../foo/bar/../../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put(workingDir + "/../foo/../foo", workingParent + "/foo"); qualifiedPaths.put(workingDir + "/../foo/bar/../../foo/bar", workingParent + "/foo/bar"); qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar"); qualifiedPaths.put("..foo/bar", workingDir + "/..foo/bar"); // GHFS specific behavior where root is it's own parent. qualifiedPaths.put("/..", fsRoot); qualifiedPaths.put("/../../..", fsRoot); qualifiedPaths.put("/../foo/", fsRoot + "foo"); qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../../..", fsRoot); qualifiedPaths.put(fsRoot + "..", fsRoot); qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../../../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar"); // Skip for authority-less gsg paths. if (fsRootUri.getAuthority() != null) { // When the path to qualify is of the form gs://somebucket, we want to qualify // it as gs://someBucket/ qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot); } for (String unqualifiedString : qualifiedPaths.keySet()) { Path unqualifiedPath = new Path(unqualifiedString); Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString)); Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath)); } }