Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemIntegrationHelper.java

License:Open Source License

/**
 * Deletes all objects from the given bucket.
 *//*from w w  w  . j a v a2  s .com*/
@Override
protected void clearBucket(String bucketName) throws IOException {
    Path hadoopPath = createSchemeCompatibleHadoopPath(bucketName, null);
    FileStatus[] statusList = null;
    try {
        // Hadoop1 returns null on listStatus FileNotFound, Hadoop2 throws:
        statusList = ghfs.listStatus(hadoopPath);
    } catch (IOException ioe) {
        // Ignored.
    }

    if (statusList != null) {
        for (FileStatus status : statusList) {
            if (!ghfs.delete(status.getPath(), true)) {
                System.err.println(String.format("Failed to delete path: '%s'", status.getPath()));
            }
        }
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java

License:Open Source License

/**
 * Actual logic for validating the GoogleHadoopFileSystemBase-specific FileStatus returned
 * by getFileStatus() or listStatus().//  w w w  .ja v  a  2 s .  c  om
 */
private void validateFileStatusInternal(String bucketName, String objectName, boolean expectedToExist,
        FileStatus fileStatus) throws IOException {
    Assert.assertEquals(String.format("Existence of bucketName '%s', objectName '%s'", bucketName, objectName),
            expectedToExist, fileStatus != null);

    if (fileStatus != null) {
        // File/dir exists, check its attributes.
        String message = (fileStatus.getPath()).toString();

        long expectedSize = ghfsHelper.getExpectedObjectSize(objectName, expectedToExist);
        if (expectedSize != Long.MIN_VALUE) {
            Assert.assertEquals(message, expectedSize, fileStatus.getLen());
        }

        boolean expectedToBeDir = Strings.isNullOrEmpty(objectName)
                || ghfsHelper.objectHasDirectoryPath(objectName);
        Assert.assertEquals(message, expectedToBeDir, fileStatus.isDir());

        Instant currentTime = Instant.now();
        Instant modificationTime = new Instant(fileStatus.getModificationTime());
        // We must subtract 1000, because some FileSystems, like LocalFileSystem, have only
        // second granularity, so we might have something like testStartTime == 1234123
        // and modificationTime == 1234000. Unfortunately, "Instant" doesn't support easy
        // conversions between units to clip to the "second" precision.
        // Alternatively, we should just use TimeUnit and formally convert "toSeconds".
        Assert.assertTrue(
                String.format("Stale file? testStartTime: %s modificationTime: %s bucket: '%s' object: '%s'",
                        testStartTime.toString(), modificationTime.toString(), bucketName, objectName),
                modificationTime.isEqual(testStartTime.minus(1000))
                        || modificationTime.isAfter(testStartTime.minus(1000)));
        Assert.assertTrue(
                String.format("Clock skew? currentTime: %s modificationTime: %s bucket: '%s' object: '%s'",
                        currentTime.toString(), modificationTime.toString(), bucketName, objectName),
                modificationTime.isEqual(currentTime) || modificationTime.isBefore(currentTime));
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates FileStatus for the given item.
 *
 * See {@link GoogleCloudStorageIntegrationTest.listObjectNamesAndGetItemInfo()} for more info.
 */// w  w  w.  ja v  a  2 s  .co m
@Override
protected void validateGetItemInfo(String bucketName, String objectName, boolean expectedToExist)
        throws IOException {
    URI path = ghfsHelper.getPath(bucketName, objectName, true);
    Path hadoopPath = ghfsHelper.castAsHadoopPath(path);
    FileStatus fileStatus = null;

    try {
        fileStatus = ghfs.getFileStatus(hadoopPath);
    } catch (FileNotFoundException e) {
        // Leaves fileStatus == null on FileNotFoundException.
    }

    if (fileStatus != null) {
        Assert.assertEquals("Hadoop paths for URI: " + path.toString(), hadoopPath, fileStatus.getPath());
    }
    validateFileStatusInternal(bucketName, objectName, expectedToExist, fileStatus);
}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates FileInfo returned by listFileInfo().
 *
 * See {@link GoogleCloudStorage.listObjectNamesAndGetItemInfo()} for more info.
 *//*from w  w w.  j  a v a2 s .  c o m*/
@Override
protected void validateListNamesAndInfo(String bucketName, String objectNamePrefix, boolean pathExpectedToExist,
        String... expectedListedNames) throws IOException {
    boolean childPathsExpectedToExist = pathExpectedToExist && (expectedListedNames != null);
    boolean listRoot = bucketName == null;

    // Prepare list of expected paths.
    List<Path> expectedPaths = new ArrayList<>();
    // Also maintain a backwards mapping to keep track of which of "expectedListedNames" and
    // "bucketName" is associated with each path, so that we can supply validateFileStatusInternal
    // with the objectName and thus enable it to lookup the internally stored expected size,
    // directory status, etc., of the associated FileStatus.
    Map<Path, String[]> pathToComponents = new HashMap<>();
    if (childPathsExpectedToExist) {
        for (String expectedListedName : expectedListedNames) {
            String[] pathComponents = new String[2];
            if (listRoot) {
                pathComponents[0] = expectedListedName;
                pathComponents[1] = null;
            } else {
                pathComponents[0] = bucketName;
                pathComponents[1] = expectedListedName;
            }
            Path expectedPath = ghfsHelper
                    .castAsHadoopPath(ghfsHelper.getPath(pathComponents[0], pathComponents[1], true));
            expectedPaths.add(expectedPath);
            pathToComponents.put(expectedPath, pathComponents);
        }
    }

    // Get list of actual paths.
    URI path = ghfsHelper.getPath(bucketName, objectNamePrefix, true);
    Path hadoopPath = ghfsHelper.castAsHadoopPath(path);
    FileStatus[] fileStatus = null;
    try {
        fileStatus = ghfsHelper.listStatus(hadoopPath);
    } catch (FileNotFoundException fnfe) {
        Assert.assertFalse(String.format("Hadoop path %s expected to exist", hadoopPath), pathExpectedToExist);
    }

    if (!ghfsFileSystemDescriptor.getScheme().equals("file")) {
        Assert.assertEquals(String.format("Hadoop path %s", hadoopPath.toString()), pathExpectedToExist,
                fileStatus != null);
    } else {
        // LocalFileSystem -> ChecksumFileSystem will return an empty array instead of null for
        // nonexistent paths.
        if (!pathExpectedToExist && fileStatus != null) {
            Assert.assertEquals(0, fileStatus.length);
        }
    }

    if (fileStatus != null) {
        Set<Path> actualPaths = new HashSet<>();
        for (FileStatus status : fileStatus) {
            Path actualPath = status.getPath();
            if (status.isDir()) {
                Assert.assertFalse(status.getPath().getName().isEmpty());
            }
            actualPaths.add(actualPath);
            String[] uriComponents = pathToComponents.get(actualPath);
            if (uriComponents != null) {
                // Only do fine-grained validation for the explicitly expected paths.
                validateFileStatusInternal(uriComponents[0], uriComponents[1], true, status);
            }
        }

        if (listRoot) {
            for (Path expectedPath : expectedPaths) {
                Assert.assertTrue(String.format("expected: <%s> in: <%s>", expectedPath, actualPaths),
                        actualPaths.contains(expectedPath));
            }
        } else {
            // Used sorted arrays so that the test-failure output is makes it easy to match up each
            // expected element to each actual element.
            Path[] sortedExpectedPaths = new ArrayList<>(Sets.newTreeSet(expectedPaths)).toArray(new Path[0]);
            Path[] sortedActualPaths = new ArrayList<>(Sets.newTreeSet(actualPaths)).toArray(new Path[0]);
            String errorMessage = String.format("expected: %s, actual: %s",
                    Arrays.toString(sortedExpectedPaths), Arrays.toString(sortedActualPaths));
            Assert.assertArrayEquals(errorMessage, sortedExpectedPaths, sortedActualPaths);
        }
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates when paths already contain a pre-escaped substring, e.g. file:///foo%3Abar/baz,
 * that the FileSystem doesn't accidentally unescape it along the way, e.g. translating into
 * file:///foo:bar/baz./*  ww w.j av  a  2  s .c  o m*/
 */
@Test
public void testPreemptivelyEscapedPaths() throws IOException {
    URI parentUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
    Path parentPath = ghfsHelper.castAsHadoopPath(parentUri);
    Path escapedPath = new Path(parentPath, new Path("foo%3Abar"));

    ghfsHelper.writeFile(escapedPath, "foo", 1, true);
    Assert.assertTrue(ghfs.exists(escapedPath));

    FileStatus status = ghfs.getFileStatus(escapedPath);
    Assert.assertEquals(escapedPath, status.getPath());

    // Cleanup.
    Assert.assertTrue(ghfs.delete(parentPath, true));
}

From source file:com.google.GsHdfs.java

License:Open Source License

public void copyHdfsToGs(String hdfsFn, String gsFn) throws Exception {
    Path srcPath = new Path(hdfsFn);
    if (hdfs.isFile(srcPath)) {
        FSDataInputStream src = hdfs.open(srcPath);
        Process gsutil = Runtime.getRuntime().exec(new String[] { "gsutil", "cp", "-", gsFn });
        OutputStream dst = gsutil.getOutputStream();
        System.out.println(hdfsFn + " -> " + gsFn);
        doCopy(src, dst, hdfsFn);/*  www.  ja v  a2 s .c o m*/
    } else {
        // Recurse
        for (FileStatus file : hdfs.listStatus(srcPath)) {
            Path path = file.getPath();
            copyHdfsToGs(path.toString(), gsFn + "/" + path.getName());
        }
    }
}

From source file:com.google.mr4c.sources.HadoopFileSource.java

License:Open Source License

private List<String> getAllFileNames(Path dir) throws IOException {
    List<String> names = new ArrayList<String>();
    FileStatus[] files = m_fs.listStatus(dir);
    if (files == null) {
        throw new FileNotFoundException(String.format("[%s] is not an existing directory", dir));
    }//from  w  w w . j  a v  a  2s  .  com
    for (FileStatus status : files) {
        if (status.isDirectory() && !m_flat) {
            names.addAll(getAllFileNames(status.getPath()));
        } else {
            String name = m_dir.toUri().relativize(status.getPath().toUri()).getPath();
            names.add(name);
        }
    }
    return names;
}

From source file:com.google.mr4c.sources.HadoopFileSource.java

License:Open Source License

public synchronized void clear() throws IOException {
    for (FileStatus status : m_fs.listStatus(m_dir)) {
        if (!m_fs.delete(status.getPath(), !m_flat)) {
            throw new IOException(String.format("Couldn't delete [%s]", status.getPath().toString()));
        }/*from   w  ww . j  a v  a2 s .  c o m*/
    }
}

From source file:com.google.mr4c.sources.MapFileSource.java

License:Open Source License

public synchronized List<String> getAllMetadataFileNames() throws IOException {
    ensureRead();/*  ww  w .  j  av a2s .c  om*/
    if (!m_fs.exists(m_metaPath)) {
        return Collections.emptyList();
    }
    List<String> names = new ArrayList<String>();
    for (FileStatus status : m_fs.listStatus(m_metaPath)) {
        names.add(status.getPath().getName());
    }
    return names;
}

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private static Map<Integer, Cluster> loadClusters(ClusterJob job) throws Exception {
    Map<Integer, Cluster> result = new HashMap<Integer, Cluster>();
    try {/* w  w w  . j ava 2 s . c o  m*/

        FileSystem fs = job.output.getFileSystem(job.conf);
        for (FileStatus seqFile : fs.globStatus(new Path(job.output, "part-*"))) {
            Path path = seqFile.getPath();
            //System.out.println("Input Path: " + path); doesn't this interfere with output?
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job.conf);
            try {
                Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
                Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
                while (reader.next(key, value)) {
                    Cluster cluster = (Cluster) value;
                    result.put(cluster.getId(), cluster);
                }
            } finally {
                reader.close();
            }
        }
    } finally {

    }
    return result;
}