List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.cloudera.cdk.data.filesystem.FileSystemDatasetRepository.java
License:Apache License
/** * Creates, if necessary, the given the location for {@code descriptor}. * * @param conf A Configuration/*from ww w . ja va2s .c om*/ * @param descriptor A DatasetDescriptor */ static void ensureExists(DatasetDescriptor descriptor, Configuration conf) { Preconditions.checkArgument(descriptor.getLocation() != null, "Cannot get FileSystem for a descriptor with no location"); final Path dataPath = new Path(descriptor.getLocation()); final FileSystem fs = fsForPath(dataPath, conf); try { if (!fs.exists(dataPath)) { fs.mkdirs(dataPath); } } catch (IOException ex) { throw new DatasetRepositoryException("Cannot access data location", ex); } }
From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java
License:Apache License
/** * Precondition-style static validation that a dataset exists * * @param fs A FileSystem where the metadata should be stored * @param location The Path where the metadata should be stored * @throws com.cloudera.cdk.data.NoSuchDatasetException if the descriptor location is missing * @throws MetadataProviderException if any IOException is thrown *//*from ww w. j a va2 s. c o m*/ @SuppressWarnings("deprecation") private static void checkExists(FileSystem fs, Path location) { try { if (!fs.exists(location)) { throw new com.cloudera.cdk.data.NoSuchDatasetException( "Descriptor location is missing: " + location); } } catch (IOException ex) { throw new MetadataProviderException("Cannot access descriptor location", ex); } }
From source file:com.cloudera.cdk.data.filesystem.TestFileSystemView.java
License:Apache License
public static void assertDirectoriesExist(FileSystem fs, Path... dirs) throws IOException { for (Path path : dirs) { Assert.assertTrue("Directory should exist: " + path, fs.exists(path) && fs.isDirectory(path)); }/*from w w w . jav a 2 s.c o m*/ }
From source file:com.cloudera.cdk.data.filesystem.TestFileSystemView.java
License:Apache License
public static void assertDirectoriesDoNotExist(FileSystem fs, Path... dirs) throws IOException { for (Path path : dirs) { Assert.assertTrue("Directory should not exist: " + path, !fs.exists(path)); }//from w ww.j a va2 s . c o m }
From source file:com.cloudera.cdk.maven.plugins.DeployAppMojo.java
License:Apache License
public void execute() throws MojoExecutionException, MojoFailureException { try {//from w ww.j a v a 2s . c o m Configuration conf = new Configuration(); Path appPath = getAppPath(); getLog().info("Deploying " + localApplicationFile + " to " + appPath); FileSystem destFileSystem = FileSystem.get(new URI(deployFileSystem), conf); if (destFileSystem.exists(appPath)) { if (!updateApplication) { throw new MojoExecutionException("Application already exists at " + appPath + ". Use 'updateApplication' option to force deployment."); } boolean success = destFileSystem.delete(appPath, true); if (!success) { throw new MojoExecutionException("Error deleting existing application at " + appPath); } } boolean success = FileUtil.copy(localApplicationFile, destFileSystem, appPath, false, conf); if (!success) { throw new MojoExecutionException( "Error creating parent directories " + "for deploying Oozie application"); } } catch (URISyntaxException e) { throw new MojoExecutionException("Syntax error in 'deployFileSystem': " + deployFileSystem, e); } catch (IOException e) { throw new MojoExecutionException("Error deploying application", e); } }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * //from w ww . ja v a 2 s.c om * @param conf * Configuration object for the Job. Used to get the FileSystem associated with it. * @param libDir * Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs. * @param classesToInclude * Classes that are needed by the job. JarFinder will look for the jar containing these classes. * @throws Exception */ public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude) throws Exception { FileSystem fs = null; List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude)); fs = FileSystem.get(conf); Map<String, String> jarMd5Map = new TreeMap<String, String>(); // for each classes we use JarFinder to locate the jar in the local classpath. for (Class<?> clz : classList) { if (clz != null) { String localJarPath = JarFinder.getJar(clz); // we don't want to upload the same jar twice if (!jarMd5Map.containsKey(localJarPath)) { // We should not push core Hadoop classes with this tool. // Should it be the responsibility of the developer or we let // this fence here? if (!clz.getName().startsWith("org.apache.hadoop.")) { // we compute the MD5 sum of the local jar InputStream in = new FileInputStream(localJarPath); boolean threw = true; try { String md5sum = DigestUtils.md5Hex(in); jarMd5Map.put(localJarPath, md5sum); threw = false; } finally { Closeables.close(in, threw); } } else { logger.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath); } } } } for (Entry<String, String> entry : jarMd5Map.entrySet()) { Path localJarPath = new Path(entry.getKey()); String jarFilename = localJarPath.getName(); String localMd5sum = entry.getValue(); logger.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum); Path remoteJarPath = new Path(libDir, jarFilename); Path remoteMd5Path = new Path(libDir, jarFilename + ".md5"); // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS, // we force the upload of the jar. if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) { copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } else { // If the jar exist,we validate the MD5 file. // If the MD5 sum is different, we upload the jar FSDataInputStream md5FileStream = null; String remoteMd5sum = ""; try { md5FileStream = fs.open(remoteMd5Path); byte[] md5bytes = new byte[32]; if (32 == md5FileStream.read(md5bytes)) { remoteMd5sum = new String(md5bytes, Charsets.UTF_8); } } finally { if (md5FileStream != null) { md5FileStream.close(); } } if (localMd5sum.equals(remoteMd5sum)) { logger.info("Jar {} already exists [{}] and md5sum are equals", jarFilename, remoteJarPath.toUri().toASCIIString()); } else { logger.info("Jar {} already exists [{}] and md5sum are different!", jarFilename, remoteJarPath.toUri().toASCIIString()); copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } } // In all case we want to add the jar to the DistributedCache's classpath DistributedCache.addFileToClassPath(remoteJarPath, conf, fs); } // and we create the symlink (was necessary in earlier versions of Hadoop) DistributedCache.createSymlink(conf); }
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop/* www . j a v a 2 s.c o m*/ public void testHadoopMinicluster() throws Exception { JobConf conf = getHadoopConf(); Assert.assertNotNull(conf); FileSystem fs = FileSystem.get(conf); Assert.assertNotNull(fs); Assert.assertEquals(fs.getUri().getScheme(), "hdfs"); Assert.assertTrue(fs.exists(getHadoopTestDir())); fs.close(); JobClient jobClient = new JobClient(conf); Assert.assertNotNull(jobClient); jobClient.close(); }
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop//www . j a va 2s .c o m public void testHadoopMapReduce() throws Exception { JobConf conf = getHadoopConf(); FileSystem fs = FileSystem.get(conf); JobClient jobClient = new JobClient(conf); try { Path inputDir = new Path(getHadoopTestDir(), "input"); Path outputDir = new Path(getHadoopTestDir(), "output"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("a\n"); writer.write("b\n"); writer.write("c\n"); writer.close(); JobConf jobConf = getHadoopConf(); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); final RunningJob runningJob = jobClient.submitJob(jobConf); waitFor(60 * 1000, true, new Predicate() { @Override public boolean evaluate() throws Exception { return runningJob.isComplete(); } }); Assert.assertTrue(runningJob.isSuccessful()); Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000"))); BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(outputDir, "part-00000")))); Assert.assertTrue(reader.readLine().trim().endsWith("a")); Assert.assertTrue(reader.readLine().trim().endsWith("b")); Assert.assertTrue(reader.readLine().trim().endsWith("c")); Assert.assertNull(reader.readLine()); reader.close(); } finally { fs.close(); jobClient.close(); } }
From source file:com.cloudera.crunch.impl.mr.exec.CrunchJob.java
License:Open Source License
private void handleMultiPaths() throws IOException { if (!multiPaths.isEmpty()) { // Need to handle moving the data from the output directory of the // job to the output locations specified in the paths. FileSystem fs = FileSystem.get(job.getConfiguration()); for (int i = 0; i < multiPaths.size(); i++) { Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*"); Path[] srcs = FileUtil.stat2Paths(fs.globStatus(src), src); Path dst = multiPaths.get(i); if (!fs.exists(dst)) { fs.mkdirs(dst);/*from ww w. j av a 2 s .com*/ } int minPartIndex = getMinPartIndex(dst, fs); for (Path s : srcs) { fs.rename(s, getDestFile(s, dst, minPartIndex++)); } } } }
From source file:com.cloudera.crunch.impl.mr.MRPipeline.java
License:Open Source License
private void cleanup() { if (!outputTargets.isEmpty()) { LOG.warn("Not running cleanup while output targets remain"); return;//from ww w.j a v a 2s . co m } try { FileSystem fs = FileSystem.get(conf); if (fs.exists(tempDirectory)) { fs.delete(tempDirectory, true); } } catch (IOException e) { LOG.info("Exception during cleanup", e); } }