List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * Validates makeQualified() when working directory is root. *///from ww w .j a v a 2 s . c o m @Test public void testMakeQualifiedRoot() { GoogleHadoopFileSystemBase myGhfs = (GoogleHadoopFileSystemBase) ghfs; myGhfs.setWorkingDirectory(myGhfs.getFileSystemRoot()); Path fsRootPath = myGhfs.getFileSystemRoot(); URI fsRootUri = fsRootPath.toUri(); String fsRoot = fsRootPath.toString(); Map<String, String> qualifiedPaths = new HashMap<>(); qualifiedPaths.put("/", fsRoot); qualifiedPaths.put("/foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(".", fsRoot); qualifiedPaths.put("foo", fsRoot + "foo"); qualifiedPaths.put("foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot, fsRoot); qualifiedPaths.put(fsRoot + "foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("/foo/../foo", fsRoot + "foo"); qualifiedPaths.put("/foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("foo/../foo", fsRoot + "foo"); qualifiedPaths.put("foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "..foo/bar", fsRoot + "..foo/bar"); qualifiedPaths.put("..foo/bar", fsRoot + "..foo/bar"); // GHFS specific behavior where root is it's own parent. qualifiedPaths.put("/..", fsRoot); qualifiedPaths.put("/../../..", fsRoot); qualifiedPaths.put("/../foo/", fsRoot + "foo"); qualifiedPaths.put("/../../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("..", fsRoot); qualifiedPaths.put("../..", fsRoot); qualifiedPaths.put("../foo", fsRoot + "foo"); qualifiedPaths.put("../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "..", fsRoot); qualifiedPaths.put(fsRoot + "../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../../../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put("../foo/../foo", fsRoot + "foo"); qualifiedPaths.put("../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "../foo/../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "../foo/bar/../../foo/bar", fsRoot + "foo/bar"); qualifiedPaths.put(fsRoot + "foo/../../../../foo", fsRoot + "foo"); qualifiedPaths.put(fsRoot + "foo/bar/../../../../../foo/bar", fsRoot + "foo/bar"); // Skip for authority-less gsg paths. if (fsRootUri.getAuthority() != null) { // When the path to qualify is of the form gs://somebucket, we want to qualify // it as gs://someBucket/ qualifiedPaths.put(fsRoot.substring(0, fsRoot.length() - 1), fsRoot); } for (String unqualifiedString : qualifiedPaths.keySet()) { Path unqualifiedPath = new Path(unqualifiedString); Path qualifiedPath = new Path(qualifiedPaths.get(unqualifiedString)); Assert.assertEquals(qualifiedPath, myGhfs.makeQualified(unqualifiedPath)); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java
License:Open Source License
/** * We override certain methods in FileSystem simply to provide debug tracing. (Search for * "Overridden functions for debug tracing" in GoogleHadoopFileSystemBase.java). * We do not add or update any functionality for such methods. The following * tests simply exercise that path to ensure coverage. Consequently, they do not * really test any functionality./*from w ww .j a v a2 s. co m*/ * * Having coverage for these methods lets us easily determine the amount of * coverage that is missing in the rest of the code. */ @Test public void provideCoverageForUnmodifiedMethods() throws IOException { // ------------------------------------------------------- // Create test data. // Temporary file in GHFS. URI tempFileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path tempFilePath = ghfsHelper.castAsHadoopPath(tempFileUri); Path tempDirPath = tempFilePath.getParent(); String text = "Hello World!"; ghfsHelper.writeFile(tempFilePath, text, 1, false); // Another temporary file in GHFS. URI tempFileUri2 = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath(); Path tempFilePath2 = ghfsHelper.castAsHadoopPath(tempFileUri2); // Temporary file in local FS. File localTempFile = File.createTempFile("ghfs-test-", null); Path localTempFilePath = new Path(localTempFile.getPath()); Path localTempDirPath = localTempFilePath.getParent(); // ------------------------------------------------------- // Call methods to provide coverage for. Note that we do not attempt to // test their functionality as we are not testing Hadoop engine here. try { ghfs.deleteOnExit(tempFilePath); ghfs.getContentSummary(tempFilePath); ghfs.getDelegationToken("foo"); ghfs.copyFromLocalFile(false, true, localTempFilePath, tempDirPath); ghfs.copyFromLocalFile(false, true, new Path[] { localTempFilePath }, tempDirPath); localTempFile.delete(); ghfs.copyToLocalFile(true, tempFilePath, localTempDirPath); File localCopiedFile = new File(localTempDirPath.toString(), tempFilePath.getName()); localCopiedFile.delete(); Path localOutputPath = ghfs.startLocalOutput(tempFilePath2, localTempFilePath); FileWriter writer = new FileWriter(localOutputPath.toString()); writer.write(text); writer.close(); ghfs.completeLocalOutput(tempFilePath2, localOutputPath); ghfs.getUsed(); ghfs.setVerifyChecksum(false); ghfs.getFileChecksum(tempFilePath2); ghfs.setPermission(tempFilePath2, FsPermission.getDefault()); try { ghfs.setOwner(tempFilePath2, "foo-user", "foo-group"); } catch (IOException ioe) { // Some filesystems (like the LocalFileSystem) are strict about existence of owners. // TODO(user): Abstract out the behaviors around owners/permissions and properly test // the different behaviors between different filesystems. } ghfs.setTimes(tempFilePath2, 0, 0); } finally { // We do not need to separately delete the temp files created in GHFS because // we delete all test buckets recursively at the end of the tests. if (localTempFile.exists()) { localTempFile.delete(); } } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystemIntegrationTest.java
License:Open Source License
/** * Validates initialize() with configuration key fs.gs.working.dir set. *///from w w w. j a v a2 s . com @Test @Override public void testInitializeWithWorkingDirectory() throws IOException, URISyntaxException { // We can just test by calling initialize multiple times (for each test condition) because // there is nothing in initialize() which must be run only once. If this changes, this test // method will need to resort to using a new GoogleHadoopGlobalRootedFileSystem() for each item // in the for-loop. List<WorkingDirData> wddList = setUpWorkingDirectoryTest(); Configuration config = new Configuration(); config.set(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, bucketName); URI gsUri = new URI("gsg://foobar/"); for (WorkingDirData wdd : wddList) { Path path = wdd.path; Path expectedWorkingDir = wdd.expectedPath; Path currentWorkingDir = ghfs.getWorkingDirectory(); config.set(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, path.toString()); ghfs.initialize(gsUri, config); Path newWorkingDir = ghfs.getWorkingDirectory(); if (expectedWorkingDir != null) { Assert.assertEquals(expectedWorkingDir, newWorkingDir); } else { Assert.assertEquals(currentWorkingDir, newWorkingDir); } } Assert.assertTrue(ghfs.getHomeDirectory().toString().startsWith("gsg:/" + bucketName)); }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemTestBase.java
License:Open Source License
/** * Validates FileInfo returned by listFileInfo(). * * See {@link GoogleCloudStorage.listObjectNamesAndGetItemInfo()} for more info. *//*w ww . jav a2 s.co m*/ @Override protected void validateListNamesAndInfo(String bucketName, String objectNamePrefix, boolean pathExpectedToExist, String... expectedListedNames) throws IOException { boolean childPathsExpectedToExist = pathExpectedToExist && (expectedListedNames != null); boolean listRoot = bucketName == null; // Prepare list of expected paths. List<Path> expectedPaths = new ArrayList<>(); // Also maintain a backwards mapping to keep track of which of "expectedListedNames" and // "bucketName" is associated with each path, so that we can supply validateFileStatusInternal // with the objectName and thus enable it to lookup the internally stored expected size, // directory status, etc., of the associated FileStatus. Map<Path, String[]> pathToComponents = new HashMap<>(); if (childPathsExpectedToExist) { for (String expectedListedName : expectedListedNames) { String[] pathComponents = new String[2]; if (listRoot) { pathComponents[0] = expectedListedName; pathComponents[1] = null; } else { pathComponents[0] = bucketName; pathComponents[1] = expectedListedName; } Path expectedPath = ghfsHelper .castAsHadoopPath(ghfsHelper.getPath(pathComponents[0], pathComponents[1], true)); expectedPaths.add(expectedPath); pathToComponents.put(expectedPath, pathComponents); } } // Get list of actual paths. URI path = ghfsHelper.getPath(bucketName, objectNamePrefix, true); Path hadoopPath = ghfsHelper.castAsHadoopPath(path); FileStatus[] fileStatus = null; try { fileStatus = ghfsHelper.listStatus(hadoopPath); } catch (FileNotFoundException fnfe) { Assert.assertFalse(String.format("Hadoop path %s expected to exist", hadoopPath), pathExpectedToExist); } if (!ghfsFileSystemDescriptor.getScheme().equals("file")) { Assert.assertEquals(String.format("Hadoop path %s", hadoopPath.toString()), pathExpectedToExist, fileStatus != null); } else { // LocalFileSystem -> ChecksumFileSystem will return an empty array instead of null for // nonexistent paths. if (!pathExpectedToExist && fileStatus != null) { Assert.assertEquals(0, fileStatus.length); } } if (fileStatus != null) { Set<Path> actualPaths = new HashSet<>(); for (FileStatus status : fileStatus) { Path actualPath = status.getPath(); if (status.isDir()) { Assert.assertFalse(status.getPath().getName().isEmpty()); } actualPaths.add(actualPath); String[] uriComponents = pathToComponents.get(actualPath); if (uriComponents != null) { // Only do fine-grained validation for the explicitly expected paths. validateFileStatusInternal(uriComponents[0], uriComponents[1], true, status); } } if (listRoot) { for (Path expectedPath : expectedPaths) { Assert.assertTrue(String.format("expected: <%s> in: <%s>", expectedPath, actualPaths), actualPaths.contains(expectedPath)); } } else { // Used sorted arrays so that the test-failure output is makes it easy to match up each // expected element to each actual element. Path[] sortedExpectedPaths = new ArrayList<>(Sets.newTreeSet(expectedPaths)).toArray(new Path[0]); Path[] sortedActualPaths = new ArrayList<>(Sets.newTreeSet(actualPaths)).toArray(new Path[0]); String errorMessage = String.format("expected: %s, actual: %s", Arrays.toString(sortedExpectedPaths), Arrays.toString(sortedActualPaths)); Assert.assertArrayEquals(errorMessage, sortedExpectedPaths, sortedActualPaths); } } }
From source file:com.google.GsHdfs.java
License:Open Source License
public void copyHdfsToGs(String hdfsFn, String gsFn) throws Exception { Path srcPath = new Path(hdfsFn); if (hdfs.isFile(srcPath)) { FSDataInputStream src = hdfs.open(srcPath); Process gsutil = Runtime.getRuntime().exec(new String[] { "gsutil", "cp", "-", gsFn }); OutputStream dst = gsutil.getOutputStream(); System.out.println(hdfsFn + " -> " + gsFn); doCopy(src, dst, hdfsFn);// w ww . j a v a2 s . c om } else { // Recurse for (FileStatus file : hdfs.listStatus(srcPath)) { Path path = file.getPath(); copyHdfsToGs(path.toString(), gsFn + "/" + path.getName()); } } }
From source file:com.gsinnovations.howdah.AbstractJob.java
License:Apache License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from ww w . ja v a2s .c o m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.hadoop.compression.lzo.LzoIndexer.java
License:Open Source License
/** * Lzo index a given path, calling recursively to index directories when encountered. * Files are only indexed if they end in .lzo and have no existing .lzo.index file. * // w w w. j a v a2 s .c o m * @param lzoPath The base path to index. * @param nestingLevel For pretty printing, the nesting level. * @throws IOException */ private void indexInternal(Path lzoPath, int nestingLevel) throws IOException { FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_); FileStatus fileStatus = fs.getFileStatus(lzoPath); // Recursively walk if (fileStatus.isDir()) { LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "..."); FileStatus[] statuses = fs.listStatus(lzoPath); for (FileStatus childStatus : statuses) { indexInternal(childStatus.getPath(), nestingLevel + 1); } } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) { Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (fs.exists(lzoIndexPath)) { LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n"); } else { long startTime = System.currentTimeMillis(); long fileSize = fileStatus.getLen(); LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size " + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB..."); if (indexSingleFile(fs, lzoPath)) { long indexSize = fs.getFileStatus(lzoIndexPath).getLen(); double elapsed = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed) + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed)) + " MB/s). Index size is " + df_.format(indexSize / 1024.0) + " KB.\n"); } } } }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override protected FileStatus[] listStatus(JobConf conf) throws IOException { List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf))); boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf); Iterator<FileStatus> it = files.iterator(); while (it.hasNext()) { FileStatus fileStatus = it.next(); Path file = fileStatus.getPath(); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // Get rid of non-LZO files, unless the conf explicitly tells us to // keep them. // However, always skip over files that end with ".lzo.index", since // they are not part of the input. if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) { it.remove();//from ww w . ja v a2s .com } } else { FileSystem fs = file.getFileSystem(conf); LzoIndex index = LzoIndex.readIndex(fs, file); indexes.put(file, index); } } return files.toArray(new FileStatus[] {}); }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override protected boolean isSplitable(FileSystem fs, Path filename) { if (LzoInputFormatCommon.isLzoFile(filename.toString())) { LzoIndex index = indexes.get(filename); return !index.isEmpty(); } else {// w w w .jav a 2 s . c o m // Delegate non-LZO files to the TextInputFormat base class. return super.isSplitable(fs, filename); } }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits); // Find new starts/ends of the filesplit that align with the LZO blocks. List<FileSplit> result = new ArrayList<FileSplit>(); for (FileSplit fileSplit : splits) { Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // non-LZO file, keep the input split as is. result.add(fileSplit);//from www .j a v a 2 s . c o m continue; } // LZO file, try to split if the .index file was found LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); } if (index.isEmpty()) { // Empty index, keep it as is. result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long lzoStart = index.alignSliceStartToIndex(start, end); long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) { result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations())); } } return result.toArray(new FileSplit[result.size()]); }