List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.cloudera.cdk.data.filesystem.ParquetFileSystemDatasetWriter.java
License:Apache License
public ParquetFileSystemDatasetWriter(FileSystem fileSystem, Path path, Schema schema, boolean enableCompression) { this.fileSystem = fileSystem; this.path = path; this.pathTmp = new Path(path.getParent(), "." + path.getName() + ".tmp"); this.schema = schema; this.enableCompression = enableCompression; this.state = ReaderWriterState.NEW; }
From source file:com.cloudera.cdk.data.filesystem.PathConversion.java
License:Apache License
public StorageKey toKey(Path fromPath, StorageKey storage) { final List<FieldPartitioner> partitioners = storage.getPartitionStrategy().getFieldPartitioners(); final List<Object> values = Lists.newArrayList(new Object[partitioners.size()]); Path currentPath = fromPath; int index = partitioners.size() - 1; while (currentPath != null && index >= 0) { values.set(index,//ww w . ja va2s . c o m valueForDirname((FieldPartitioner<?, ?>) partitioners.get(index), currentPath.getName())); // update currentPath = currentPath.getParent(); index -= 1; } storage.replaceValues(values); return storage; }
From source file:com.cloudera.cdk.data.filesystem.PathFilters.java
License:Apache License
public static PathFilter notHidden() { return new PathFilter() { @Override/*from ww w .ja v a 2 s .c o m*/ public boolean accept(Path path) { return !(path.getName().startsWith(".") || path.getName().startsWith("_")); } }; }
From source file:com.cloudera.cdk.morphline.hadoop.core.DownloadHdfsFileTest.java
License:Apache License
@Test public void testBasic() throws IOException { String msg = "hello world"; // setup: copy a file to HDFS to prepare inputFile Path inputFile = fileSystem.makeQualified(new Path(testDirectory, fileName)); FSDataOutputStream out = fileSystem.create(inputFile); IOUtils.copyBytes(new ByteArrayInputStream(msg.getBytes(Charsets.UTF_8)), out, fileSystem.getConf()); out.close();/*from w ww . j a v a 2 s. c o m*/ File cwd = Files.createTempDir().getAbsoluteFile(); if (isDir) { dst = new File(cwd, testDirectory.getName() + "/" + inputFile.getName()); inputFile = inputFile.getParent(); } else { dst = new File(cwd, inputFile.getName()); } Assert.assertFalse(dst.exists()); new File(cwd, fileName).mkdirs(); // will be auto deleted! Files.write("wrong msg", new File(new File(cwd, fileName), fileName), Charsets.UTF_8); // will be auto deleted! Command morphline = createMorphline("test-morphlines/testDownloadHdfsFile", inputFile, cwd); Assert.assertTrue(morphline.process(new Record())); Assert.assertEquals(msg, Files.toString(dst, Charsets.UTF_8)); if (isDir) { FileUtil.fullyDelete(dst.getParentFile()); } else { FileUtil.fullyDelete(dst); } Assert.assertTrue(fileSystem.exists(inputFile)); Assert.assertTrue(FileUtil.fullyDelete(cwd)); // verify that subsequent calls with same inputFile won't copy the file again (to prevent races) morphline = createMorphline("test-morphlines/downloadHdfsFile", inputFile, cwd); Assert.assertTrue(morphline.process(new Record())); Assert.assertFalse(dst.exists()); Assert.assertTrue(morphline.process(new Record())); Assert.assertFalse(dst.exists()); Assert.assertFalse(cwd.exists()); Assert.assertTrue(fileSystem.delete(inputFile, true)); try { morphline = createMorphline("test-morphlines/downloadHdfsFile", new Path("nonExistingInputFile"), cwd); Assert.fail("failed to detect non-existing input file"); } catch (MorphlineCompilationException e) { Assert.assertTrue(e.getCause() instanceof FileNotFoundException); } Assert.assertFalse(dst.exists()); }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.SingleStreamFileSystem.java
License:Apache License
@Override public FSDataInputStream open(final Path f, final int bufferSize) throws IOException { if (f.equals(path)) { return inputStream; }//from ww w.j a v a 2 s .c o m throw new UnsupportedOperationException("Path " + f.getName() + " is not found"); }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.SingleStreamFileSystem.java
License:Apache License
@Override public FileStatus getFileStatus(final Path f) throws IOException { if (path.equals(f)) { return fileStatus; }//from ww w.j a va2 s. c o m throw new UnsupportedOperationException("Path " + f.getName() + " is not found"); }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * @param fs/*from w w w. j a va 2 s .c om*/ * File system where to upload the jar. * @param localJarPath * The local path where we find the jar. * @param md5sum * The MD5 sum of the local jar. * @param remoteJarPath * The remote path where to upload the jar. * @param remoteMd5Path * The remote path where to create the MD5 file. * * @throws IOException */ private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath, Path remoteMd5Path) throws IOException { logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString()); fs.copyFromLocalFile(localJarPath, remoteJarPath); // create the MD5 file for this jar. createMd5SumFile(fs, md5sum, remoteMd5Path); // we need to clean the tmp files that are are created by JarFinder after the JVM exits. if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteJarPath); } // same for the MD5 file. if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteMd5Path); } }
From source file:com.cloudera.crunch.impl.mr.exec.CrunchJob.java
License:Open Source License
private Path getDestFile(Path src, Path dir, int index) { String form = "part-%s-%05d"; if (src.getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) { form = form + ".avro"; }/*w w w. j ava 2 s .c o m*/ return new Path(dir, String.format(form, mapOnlyJob ? "m" : "r", index)); }
From source file:com.cloudera.data.filesystem.FileSystemDataset.java
License:Apache License
private PartitionKey fromDirectoryName(Path dir) { List<Object> values = Lists.newArrayList(); if (partitionKey != null) { values.addAll(partitionKey.getValues()); }//from ww w. j a va2 s .com values.add(Splitter.on('=').split(dir.getName())); return Accessor.getDefault().newPartitionKey(values.toArray()); }
From source file:com.cloudera.data.filesystem.ParquetFileSystemDatasetWriter.java
License:Apache License
public ParquetFileSystemDatasetWriter(FileSystem fileSystem, Path path, Schema schema) { this.fileSystem = fileSystem; this.path = path; this.pathTmp = new Path(path.getParent(), "." + path.getName() + ".tmp"); this.schema = schema; this.state = ReaderWriterState.NEW; }