Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates makeQualified() when working directory is root.
 *///from ww  w .j a v a  2 s .  c  o m
@Test
public void testMakeQualifiedRoot() {
    GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs;
    myGhfs.setWorkingDirectory(myGhfs.getFileSystemRoot());
    Path fsRootPath = myGhfs.getFileSystemRoot();
    URI fsRootUri = fsRootPath.toUri();
    String fsRoot = fsRootPath.toString();
    Map<String, String> qualifiedPaths = new HashMap<>();
    qualifiedPaths.put("/", fsRoot);
    qualifiedPaths.put("/foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(".", fsRoot);
    qualifiedPaths.put("foo", fsRoot + "foo");
    qualifiedPaths.put("foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot, fsRoot);
    qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("/foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar");
    qualifiedPaths.put("..foo/bar", fsRoot + "..foo/bar");

    // GHFS specific behavior where root is it's own parent.
    qualifiedPaths.put("/..", fsRoot);
    qualifiedPaths.put("/../../..", fsRoot);
    qualifiedPaths.put("/../foo/", fsRoot + "foo");
    qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("..", fsRoot);
    qualifiedPaths.put("../..", fsRoot);
    qualifiedPaths.put("../foo", fsRoot + "foo");
    qualifiedPaths.put("../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "..", fsRoot);
    qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put("../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put("../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "../foo/../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "../foo/bar/../../foo/bar", fsRoot + "foo/bar");
    qualifiedPaths.put(fsRoot + "foo/../../../../foo", fsRoot + "foo");
    qualifiedPaths.put(fsRoot + "foo/bar/../../../../../foo/bar", fsRoot + "foo/bar");

    // Skip for authority-less gsg paths.
    if (fsRootUri.getAuthority() != null) {
        // When the path to qualify is of the form gs://somebucket, we want to qualify
        // it as gs://someBucket/
        qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot);
    }

    for (String unqualifiedString : qualifiedPaths.keySet()) {
        Path unqualifiedPath = new Path(unqualifiedString);
        Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString));
        Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath));
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java

License:Open Source License

/**
 * We override certain methods in FileSystem simply to provide debug tracing. (Search for
 * "Overridden functions for debug tracing" in GoogleHadoopFileSystemBase.java).
 * We do not add or update any functionality for such methods. The following
 * tests simply exercise that path to ensure coverage. Consequently, they do not
 * really test any functionality./*from   w  ww .j  a v  a2  s. co  m*/
 *
 * Having coverage for these methods lets us easily determine the amount of
 * coverage that is missing in the rest of the code.
 */
@Test
public void provideCoverageForUnmodifiedMethods() throws IOException {
    // -------------------------------------------------------
    // Create test data.

    // Temporary file in GHFS.
    URI tempFileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
    Path tempFilePath = ghfsHelper.castAsHadoopPath(tempFileUri);
    Path tempDirPath = tempFilePath.getParent();
    String text = "Hello World!";
    ghfsHelper.writeFile(tempFilePath, text, 1, false);

    // Another temporary file in GHFS.
    URI tempFileUri2 = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
    Path tempFilePath2 = ghfsHelper.castAsHadoopPath(tempFileUri2);

    // Temporary file in local FS.
    File localTempFile = File.createTempFile("ghfs-test-", null);
    Path localTempFilePath = new Path(localTempFile.getPath());
    Path localTempDirPath = localTempFilePath.getParent();

    // -------------------------------------------------------
    // Call methods to provide coverage for. Note that we do not attempt to
    // test their functionality as we are not testing Hadoop engine here.
    try {
        ghfs.deleteOnExit(tempFilePath);
        ghfs.getContentSummary(tempFilePath);
        ghfs.getDelegationToken("foo");
        ghfs.copyFromLocalFile(false, true, localTempFilePath, tempDirPath);
        ghfs.copyFromLocalFile(false, true, new Path[] { localTempFilePath }, tempDirPath);
        localTempFile.delete();
        ghfs.copyToLocalFile(true, tempFilePath, localTempDirPath);
        File localCopiedFile = new File(localTempDirPath.toString(), tempFilePath.getName());
        localCopiedFile.delete();
        Path localOutputPath = ghfs.startLocalOutput(tempFilePath2, localTempFilePath);
        FileWriter writer = new FileWriter(localOutputPath.toString());
        writer.write(text);
        writer.close();
        ghfs.completeLocalOutput(tempFilePath2, localOutputPath);
        ghfs.getUsed();
        ghfs.setVerifyChecksum(false);
        ghfs.getFileChecksum(tempFilePath2);
        ghfs.setPermission(tempFilePath2, FsPermission.getDefault());
        try {
            ghfs.setOwner(tempFilePath2, "foo-user", "foo-group");
        } catch (IOException ioe) {
            // Some filesystems (like the LocalFileSystem) are strict about existence of owners.
            // TODO(user): Abstract out the behaviors around owners/permissions and properly test
            // the different behaviors between different filesystems.
        }
        ghfs.setTimes(tempFilePath2, 0, 0);
    } finally {
        // We do not need to separately delete the temp files created in GHFS because
        // we delete all test buckets recursively at the end of the tests.
        if (localTempFile.exists()) {
            localTempFile.delete();
        }
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystemIntegrationTest.java

License:Open Source License

/**
 * Validates initialize() with configuration key fs.gs.working.dir set.
 *///from   w  w  w. j  a v  a2 s  .  com
@Test
@Override
public void testInitializeWithWorkingDirectory() throws IOException, URISyntaxException {
    // We can just test by calling initialize multiple times (for each test condition) because
    // there is nothing in initialize() which must be run only once. If this changes, this test
    // method will need to resort to using a new GoogleHadoopGlobalRootedFileSystem() for each item
    // in the for-loop.
    List<WorkingDirData> wddList = setUpWorkingDirectoryTest();
    Configuration config = new Configuration();
    config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, bucketName);
    URI gsUri = new URI("gsg://foobar/");
    for (WorkingDirData wdd : wddList) {
        Path path = wdd.path;
        Path expectedWorkingDir = wdd.expectedPath;
        Path currentWorkingDir = ghfs.getWorkingDirectory();
        config.set(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, path.toString());
        ghfs.initialize(gsUri, config);
        Path newWorkingDir = ghfs.getWorkingDirectory();
        if (expectedWorkingDir != null) {
            Assert.assertEquals(expectedWorkingDir, newWorkingDir);
        } else {
            Assert.assertEquals(currentWorkingDir, newWorkingDir);
        }
    }
    Assert.assertTrue(ghfs.getHomeDirectory().toString().startsWith("gsg:/" + bucketName));
}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java

License:Open Source License

/**
 * Validates FileInfo returned by listFileInfo().
 *
 * See {@link GoogleCloudStorage.listObjectNamesAndGetItemInfo()} for more info.
 *//*w ww .  jav a2  s.co  m*/
@Override
protected void validateListNamesAndInfo(String bucketName, String objectNamePrefix, boolean pathExpectedToExist,
        String... expectedListedNames) throws IOException {
    boolean childPathsExpectedToExist = pathExpectedToExist && (expectedListedNames != null);
    boolean listRoot = bucketName == null;

    // Prepare list of expected paths.
    List<Path> expectedPaths = new ArrayList<>();
    // Also maintain a backwards mapping to keep track of which of "expectedListedNames" and
    // "bucketName" is associated with each path, so that we can supply validateFileStatusInternal
    // with the objectName and thus enable it to lookup the internally stored expected size,
    // directory status, etc., of the associated FileStatus.
    Map<Path, String[]> pathToComponents = new HashMap<>();
    if (childPathsExpectedToExist) {
        for (String expectedListedName : expectedListedNames) {
            String[] pathComponents = new String[2];
            if (listRoot) {
                pathComponents[0] = expectedListedName;
                pathComponents[1] = null;
            } else {
                pathComponents[0] = bucketName;
                pathComponents[1] = expectedListedName;
            }
            Path expectedPath = ghfsHelper
                    .castAsHadoopPath(ghfsHelper.getPath(pathComponents[0], pathComponents[1], true));
            expectedPaths.add(expectedPath);
            pathToComponents.put(expectedPath, pathComponents);
        }
    }

    // Get list of actual paths.
    URI path = ghfsHelper.getPath(bucketName, objectNamePrefix, true);
    Path hadoopPath = ghfsHelper.castAsHadoopPath(path);
    FileStatus[] fileStatus = null;
    try {
        fileStatus = ghfsHelper.listStatus(hadoopPath);
    } catch (FileNotFoundException fnfe) {
        Assert.assertFalse(String.format("Hadoop path %s expected to exist", hadoopPath), pathExpectedToExist);
    }

    if (!ghfsFileSystemDescriptor.getScheme().equals("file")) {
        Assert.assertEquals(String.format("Hadoop path %s", hadoopPath.toString()), pathExpectedToExist,
                fileStatus != null);
    } else {
        // LocalFileSystem -> ChecksumFileSystem will return an empty array instead of null for
        // nonexistent paths.
        if (!pathExpectedToExist && fileStatus != null) {
            Assert.assertEquals(0, fileStatus.length);
        }
    }

    if (fileStatus != null) {
        Set<Path> actualPaths = new HashSet<>();
        for (FileStatus status : fileStatus) {
            Path actualPath = status.getPath();
            if (status.isDir()) {
                Assert.assertFalse(status.getPath().getName().isEmpty());
            }
            actualPaths.add(actualPath);
            String[] uriComponents = pathToComponents.get(actualPath);
            if (uriComponents != null) {
                // Only do fine-grained validation for the explicitly expected paths.
                validateFileStatusInternal(uriComponents[0], uriComponents[1], true, status);
            }
        }

        if (listRoot) {
            for (Path expectedPath : expectedPaths) {
                Assert.assertTrue(String.format("expected: <%s> in: <%s>", expectedPath, actualPaths),
                        actualPaths.contains(expectedPath));
            }
        } else {
            // Used sorted arrays so that the test-failure output is makes it easy to match up each
            // expected element to each actual element.
            Path[] sortedExpectedPaths = new ArrayList<>(Sets.newTreeSet(expectedPaths)).toArray(new Path[0]);
            Path[] sortedActualPaths = new ArrayList<>(Sets.newTreeSet(actualPaths)).toArray(new Path[0]);
            String errorMessage = String.format("expected: %s, actual: %s",
                    Arrays.toString(sortedExpectedPaths), Arrays.toString(sortedActualPaths));
            Assert.assertArrayEquals(errorMessage, sortedExpectedPaths, sortedActualPaths);
        }
    }
}

From source file:com.google.GsHdfs.java

License:Open Source License

public void copyHdfsToGs(String hdfsFn, String gsFn) throws Exception {
    Path srcPath = new Path(hdfsFn);
    if (hdfs.isFile(srcPath)) {
        FSDataInputStream src = hdfs.open(srcPath);
        Process gsutil = Runtime.getRuntime().exec(new String[] { "gsutil", "cp", "-", gsFn });
        OutputStream dst = gsutil.getOutputStream();
        System.out.println(hdfsFn + " -> " + gsFn);
        doCopy(src, dst, hdfsFn);//  w  ww  . j a v a2 s  .  c  om
    } else {
        // Recurse
        for (FileStatus file : hdfs.listStatus(srcPath)) {
            Path path = file.getPath();
            copyHdfsToGs(path.toString(), gsFn + "/" + path.getName());
        }
    }
}

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from  ww  w  .  ja v  a2s  .c o m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.hadoop.compression.lzo.LzoIndexer.java

License:Open Source License

/**
 * Lzo index a given path, calling recursively to index directories when encountered.
 * Files are only indexed if they end in .lzo and have no existing .lzo.index file.
 * // w  w w.  j  a  v a2 s  .c  o  m
 * @param lzoPath The base path to index.
 * @param nestingLevel For pretty printing, the nesting level.
 * @throws IOException
 */
private void indexInternal(Path lzoPath, int nestingLevel) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_);
    FileStatus fileStatus = fs.getFileStatus(lzoPath);

    // Recursively walk
    if (fileStatus.isDir()) {
        LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "...");
        FileStatus[] statuses = fs.listStatus(lzoPath);
        for (FileStatus childStatus : statuses) {
            indexInternal(childStatus.getPath(), nestingLevel + 1);
        }
    } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) {
        Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX);
        if (fs.exists(lzoIndexPath)) {
            LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n");
        } else {
            long startTime = System.currentTimeMillis();
            long fileSize = fileStatus.getLen();

            LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size "
                    + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB...");
            if (indexSingleFile(fs, lzoPath)) {
                long indexSize = fs.getFileStatus(lzoIndexPath).getLen();
                double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
                LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed)
                        + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed))
                        + " MB/s).  Index size is " + df_.format(indexSize / 1024.0) + " KB.\n");
            }
        }
    }
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
protected FileStatus[] listStatus(JobConf conf) throws IOException {
    List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf)));

    boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf);

    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        Path file = fileStatus.getPath();

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // Get rid of non-LZO files, unless the conf explicitly tells us to
            // keep them.
            // However, always skip over files that end with ".lzo.index", since
            // they are not part of the input.
            if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) {
                it.remove();//from  ww  w . ja  v a2s .com
            }
        } else {
            FileSystem fs = file.getFileSystem(conf);
            LzoIndex index = LzoIndex.readIndex(fs, file);
            indexes.put(file, index);
        }
    }

    return files.toArray(new FileStatus[] {});
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
protected boolean isSplitable(FileSystem fs, Path filename) {
    if (LzoInputFormatCommon.isLzoFile(filename.toString())) {
        LzoIndex index = indexes.get(filename);
        return !index.isEmpty();
    } else {// w  w w .jav  a 2 s . c  o m
        // Delegate non-LZO files to the TextInputFormat base class.
        return super.isSplitable(fs, filename);
    }
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
    FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits);
    // Find new starts/ends of the filesplit that align with the LZO blocks.

    List<FileSplit> result = new ArrayList<FileSplit>();

    for (FileSplit fileSplit : splits) {
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // non-LZO file, keep the input split as is.
            result.add(fileSplit);//from   www .j  a  v a 2 s  .  c  o  m
            continue;
        }

        // LZO file, try to split if the .index file was found
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }
        if (index.isEmpty()) {
            // Empty index, keep it as is.
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long lzoStart = index.alignSliceStartToIndex(start, end);
        long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) {
            result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations()));
        }
    }

    return result.toArray(new FileSplit[result.size()]);
}