List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemIntegrationHelper.java
License:Open Source License
/** * Deletes all objects from the given bucket. *//*from w w w . j a v a2 s .com*/ @Override protected void clearBucket(String bucketName) throws IOException { Path hadoopPath = createSchemeCompatibleHadoopPath(bucketName, null); FileStatus[] statusList = null; try { // Hadoop1 returns null on listStatus FileNotFound, Hadoop2 throws: statusList = ghfs.listStatus(hadoopPath); } catch (IOException ioe) { // Ignored. } if (statusList != null) { for (FileStatus status : statusList) { if (!ghfs.delete(status.getPath(), true)) { System.err.println(String.format("Failed to delete path: '%s'", status.getPath())); } } } }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java
License:Open Source License
/** * Actual logic for validating the GoogleHadoopFileSystemBase-specific FileStatus returned * by getFileStatus() or listStatus().// w w w .ja v a 2 s . c om */ private void validateFileStatusInternal(String bucketName, String objectName, boolean expectedToExist, FileStatus fileStatus) throws IOException { Assert.assertEquals(String.format("Existence of bucketName '%s', objectName '%s'", bucketName, objectName), expectedToExist, fileStatus != null); if (fileStatus != null) { // File/dir exists, check its attributes. String message = (fileStatus.getPath()).toString(); long expectedSize = ghfsHelper.getExpectedObjectSize(objectName, expectedToExist); if (expectedSize != Long.MIN_VALUE) { Assert.assertEquals(message, expectedSize, fileStatus.getLen()); } boolean expectedToBeDir = Strings.isNullOrEmpty(objectName) || ghfsHelper.objectHasDirectoryPath(objectName); Assert.assertEquals(message, expectedToBeDir, fileStatus.isDir()); Instant currentTime = Instant.now(); Instant modificationTime = new Instant(fileStatus.getModificationTime()); // We must subtract 1000, because some FileSystems, like LocalFileSystem, have only // second granularity, so we might have something like testStartTime == 1234123 // and modificationTime == 1234000. Unfortunately, "Instant" doesn't support easy // conversions between units to clip to the "second" precision. // Alternatively, we should just use TimeUnit and formally convert "toSeconds". Assert.assertTrue( String.format("Stale file? testStartTime: %s modificationTime: %s bucket: '%s' object: '%s'", testStartTime.toString(), modificationTime.toString(), bucketName, objectName), modificationTime.isEqual(testStartTime.minus(1000)) || modificationTime.isAfter(testStartTime.minus(1000))); Assert.assertTrue( String.format("Clock skew? currentTime: %s modificationTime: %s bucket: '%s' object: '%s'", currentTime.toString(), modificationTime.toString(), bucketName, objectName), modificationTime.isEqual(currentTime) || modificationTime.isBefore(currentTime)); } }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java
License:Open Source License
/** * Validates FileStatus for the given item. * * See {@link GoogleCloudStorageIntegrationTest.listObjectNamesAndGetItemInfo()} for more info. */// w w w. ja v a 2 s .co m @Override protected void validateGetItemInfo(String bucketName, String objectName, boolean expectedToExist) throws IOException { URI path = ghfsHelper.getPath(bucketName, objectName, true); Path hadoopPath = ghfsHelper.castAsHadoopPath(path); FileStatus fileStatus = null; try { fileStatus = ghfs.getFileStatus(hadoopPath); } catch (FileNotFoundException e) { // Leaves fileStatus == null on FileNotFoundException. } if (fileStatus != null) { Assert.assertEquals("Hadoop paths for URI: " + path.toString(), hadoopPath, fileStatus.getPath()); } validateFileStatusInternal(bucketName, objectName, expectedToExist, fileStatus); }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java
License:Open Source License
/** * Validates FileInfo returned by listFileInfo(). * * See {@link GoogleCloudStorage.listObjectNamesAndGetItemInfo()} for more info. *//*from w w w. j a v a2 s . c o m*/ @Override protected void validateListNamesAndInfo(String bucketName, String objectNamePrefix, boolean pathExpectedToExist, String... expectedListedNames) throws IOException { boolean childPathsExpectedToExist = pathExpectedToExist && (expectedListedNames != null); boolean listRoot = bucketName == null; // Prepare list of expected paths. List<Path> expectedPaths = new ArrayList<>(); // Also maintain a backwards mapping to keep track of which of "expectedListedNames" and // "bucketName" is associated with each path, so that we can supply validateFileStatusInternal // with the objectName and thus enable it to lookup the internally stored expected size, // directory status, etc., of the associated FileStatus. Map<Path, String[]> pathToComponents = new HashMap<>(); if (childPathsExpectedToExist) { for (String expectedListedName : expectedListedNames) { String[] pathComponents = new String[2]; if (listRoot) { pathComponents[0] = expectedListedName; pathComponents[1] = null; } else { pathComponents[0] = bucketName; pathComponents[1] = expectedListedName; } Path expectedPath = ghfsHelper .castAsHadoopPath(ghfsHelper.getPath(pathComponents[0], pathComponents[1], true)); expectedPaths.add(expectedPath); pathToComponents.put(expectedPath, pathComponents); } } // Get list of actual paths. URI path = ghfsHelper.getPath(bucketName, objectNamePrefix, true); Path hadoopPath = ghfsHelper.castAsHadoopPath(path); FileStatus[] fileStatus = null; try { fileStatus = ghfsHelper.listStatus(hadoopPath); } catch (FileNotFoundException fnfe) { Assert.assertFalse(String.format("Hadoop path %s expected to exist", hadoopPath), pathExpectedToExist); } if (!ghfsFileSystemDescriptor.getScheme().equals("file")) { Assert.assertEquals(String.format("Hadoop path %s", hadoopPath.toString()), pathExpectedToExist, fileStatus != null); } else { // LocalFileSystem -> ChecksumFileSystem will return an empty array instead of null for // nonexistent paths. if (!pathExpectedToExist && fileStatus != null) { Assert.assertEquals(0, fileStatus.length); } } if (fileStatus != null) { Set<Path> actualPaths = new HashSet<>(); for (FileStatus status : fileStatus) { Path actualPath = status.getPath(); if (status.isDir()) { Assert.assertFalse(status.getPath().getName().isEmpty()); } actualPaths.add(actualPath); String[] uriComponents = pathToComponents.get(actualPath); if (uriComponents != null) { // Only do fine-grained validation for the explicitly expected paths. validateFileStatusInternal(uriComponents[0], uriComponents[1], true, status); } } if (listRoot) { for (Path expectedPath : expectedPaths) { Assert.assertTrue(String.format("expected: <%s> in: <%s>", expectedPath, actualPaths), actualPaths.contains(expectedPath)); } } else { // Used sorted arrays so that the test-failure output is makes it easy to match up each // expected element to each actual element. Path[] sortedExpectedPaths = new ArrayList<>(Sets.newTreeSet(expectedPaths)).toArray(new Path[0]); Path[] sortedActualPaths = new ArrayList<>(Sets.newTreeSet(actualPaths)).toArray(new Path[0]); String errorMessage = String.format("expected: %s, actual: %s", Arrays.toString(sortedExpectedPaths), Arrays.toString(sortedActualPaths)); Assert.assertArrayEquals(errorMessage, sortedExpectedPaths, sortedActualPaths); } } }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java
License:Open Source License
/** * Validates when paths already contain a pre-escaped substring, e.g. file:///foo%3Abar/baz, * that the FileSystem doesn't accidentally unescape it along the way, e.g. translating into * file:///foo:bar/baz./* ww w.j av a 2 s .c o m*/ */ @Test public void testPreemptivelyEscapedPaths() throws IOException { URI parentUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path parentPath = ghfsHelper.castAsHadoopPath(parentUri); Path escapedPath = new Path(parentPath, new Path("foo%3Abar")); ghfsHelper.writeFile(escapedPath, "foo", 1, true); Assert.assertTrue(ghfs.exists(escapedPath)); FileStatus status = ghfs.getFileStatus(escapedPath); Assert.assertEquals(escapedPath, status.getPath()); // Cleanup. Assert.assertTrue(ghfs.delete(parentPath, true)); }
From source file:com.google.GsHdfs.java
License:Open Source License
public void copyHdfsToGs(String hdfsFn, String gsFn) throws Exception { Path srcPath = new Path(hdfsFn); if (hdfs.isFile(srcPath)) { FSDataInputStream src = hdfs.open(srcPath); Process gsutil = Runtime.getRuntime().exec(new String[] { "gsutil", "cp", "-", gsFn }); OutputStream dst = gsutil.getOutputStream(); System.out.println(hdfsFn + " -> " + gsFn); doCopy(src, dst, hdfsFn);/* www. ja v a2 s .c o m*/ } else { // Recurse for (FileStatus file : hdfs.listStatus(srcPath)) { Path path = file.getPath(); copyHdfsToGs(path.toString(), gsFn + "/" + path.getName()); } } }
From source file:com.google.mr4c.sources.HadoopFileSource.java
License:Open Source License
private List<String> getAllFileNames(Path dir) throws IOException { List<String> names = new ArrayList<String>(); FileStatus[] files = m_fs.listStatus(dir); if (files == null) { throw new FileNotFoundException(String.format("[%s] is not an existing directory", dir)); }//from w w w . j a v a 2s . com for (FileStatus status : files) { if (status.isDirectory() && !m_flat) { names.addAll(getAllFileNames(status.getPath())); } else { String name = m_dir.toUri().relativize(status.getPath().toUri()).getPath(); names.add(name); } } return names; }
From source file:com.google.mr4c.sources.HadoopFileSource.java
License:Open Source License
public synchronized void clear() throws IOException { for (FileStatus status : m_fs.listStatus(m_dir)) { if (!m_fs.delete(status.getPath(), !m_flat)) { throw new IOException(String.format("Couldn't delete [%s]", status.getPath().toString())); }/*from w ww . j a v a2 s . c o m*/ } }
From source file:com.google.mr4c.sources.MapFileSource.java
License:Open Source License
public synchronized List<String> getAllMetadataFileNames() throws IOException { ensureRead();/* ww w . j av a2s .c om*/ if (!m_fs.exists(m_metaPath)) { return Collections.emptyList(); } List<String> names = new ArrayList<String>(); for (FileStatus status : m_fs.listStatus(m_metaPath)) { names.add(status.getPath().getName()); } return names; }
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static Map<Integer, Cluster> loadClusters(ClusterJob job) throws Exception { Map<Integer, Cluster> result = new HashMap<Integer, Cluster>(); try {/* w w w . j ava 2 s . c o m*/ FileSystem fs = job.output.getFileSystem(job.conf); for (FileStatus seqFile : fs.globStatus(new Path(job.output, "part-*"))) { Path path = seqFile.getPath(); //System.out.println("Input Path: " + path); doesn't this interfere with output? SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job.conf); try { Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance(); while (reader.next(key, value)) { Cluster cluster = (Cluster) value; result.put(cluster.getId(), cluster); } } finally { reader.close(); } } } finally { } return result; }