Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.cloudera.cdk.data.filesystem.FileSystemDatasetRepository.java

License:Apache License

/**
 * Creates, if necessary, the given the location for {@code descriptor}.
 *
 * @param conf A Configuration/*from  ww  w . ja va2s .c  om*/
 * @param descriptor A DatasetDescriptor
 */
static void ensureExists(DatasetDescriptor descriptor, Configuration conf) {
    Preconditions.checkArgument(descriptor.getLocation() != null,
            "Cannot get FileSystem for a descriptor with no location");
    final Path dataPath = new Path(descriptor.getLocation());

    final FileSystem fs = fsForPath(dataPath, conf);

    try {
        if (!fs.exists(dataPath)) {
            fs.mkdirs(dataPath);
        }
    } catch (IOException ex) {
        throw new DatasetRepositoryException("Cannot access data location", ex);
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java

License:Apache License

/**
 * Precondition-style static validation that a dataset exists
 *
 * @param fs        A FileSystem where the metadata should be stored
 * @param location  The Path where the metadata should be stored
 * @throws com.cloudera.cdk.data.NoSuchDatasetException if the descriptor location is missing
 * @throws MetadataProviderException  if any IOException is thrown
 *//*from ww  w. j a  va2 s.  c o  m*/
@SuppressWarnings("deprecation")
private static void checkExists(FileSystem fs, Path location) {
    try {
        if (!fs.exists(location)) {
            throw new com.cloudera.cdk.data.NoSuchDatasetException(
                    "Descriptor location is missing: " + location);
        }
    } catch (IOException ex) {
        throw new MetadataProviderException("Cannot access descriptor location", ex);
    }
}

From source file:com.cloudera.cdk.data.filesystem.TestFileSystemView.java

License:Apache License

public static void assertDirectoriesExist(FileSystem fs, Path... dirs) throws IOException {
    for (Path path : dirs) {
        Assert.assertTrue("Directory should exist: " + path, fs.exists(path) && fs.isDirectory(path));
    }/*from   w w w .  jav  a 2 s.c  o  m*/
}

From source file:com.cloudera.cdk.data.filesystem.TestFileSystemView.java

License:Apache License

public static void assertDirectoriesDoNotExist(FileSystem fs, Path... dirs) throws IOException {
    for (Path path : dirs) {
        Assert.assertTrue("Directory should not exist: " + path, !fs.exists(path));
    }//from  w  ww.j a  va2 s . c  o  m
}

From source file:com.cloudera.cdk.maven.plugins.DeployAppMojo.java

License:Apache License

public void execute() throws MojoExecutionException, MojoFailureException {
    try {//from   w ww.j  a v  a  2s  . c o  m
        Configuration conf = new Configuration();
        Path appPath = getAppPath();
        getLog().info("Deploying " + localApplicationFile + " to " + appPath);

        FileSystem destFileSystem = FileSystem.get(new URI(deployFileSystem), conf);
        if (destFileSystem.exists(appPath)) {
            if (!updateApplication) {
                throw new MojoExecutionException("Application already exists at " + appPath
                        + ". Use 'updateApplication' option to force deployment.");
            }
            boolean success = destFileSystem.delete(appPath, true);
            if (!success) {
                throw new MojoExecutionException("Error deleting existing application at " + appPath);
            }
        }
        boolean success = FileUtil.copy(localApplicationFile, destFileSystem, appPath, false, conf);
        if (!success) {
            throw new MojoExecutionException(
                    "Error creating parent directories " + "for deploying Oozie application");
        }
    } catch (URISyntaxException e) {
        throw new MojoExecutionException("Syntax error in 'deployFileSystem': " + deployFileSystem, e);
    } catch (IOException e) {
        throw new MojoExecutionException("Error deploying application", e);
    }
}

From source file:com.cloudera.cdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * //from  w  ww  . ja v  a 2  s.c  om
 * @param conf
 *            Configuration object for the Job. Used to get the FileSystem associated with it.
 * @param libDir
 *            Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs.
 * @param classesToInclude
 *            Classes that are needed by the job. JarFinder will look for the jar containing these classes.
 * @throws Exception
 */
public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude)
        throws Exception {
    FileSystem fs = null;
    List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude));
    fs = FileSystem.get(conf);
    Map<String, String> jarMd5Map = new TreeMap<String, String>();
    // for each classes we use JarFinder to locate the jar in the local classpath.
    for (Class<?> clz : classList) {
        if (clz != null) {
            String localJarPath = JarFinder.getJar(clz);
            // we don't want to upload the same jar twice
            if (!jarMd5Map.containsKey(localJarPath)) {
                // We should not push core Hadoop classes with this tool.
                // Should it be the responsibility of the developer or we let
                // this fence here?
                if (!clz.getName().startsWith("org.apache.hadoop.")) {
                    // we compute the MD5 sum of the local jar
                    InputStream in = new FileInputStream(localJarPath);
                    boolean threw = true;
                    try {
                        String md5sum = DigestUtils.md5Hex(in);
                        jarMd5Map.put(localJarPath, md5sum);
                        threw = false;
                    } finally {
                        Closeables.close(in, threw);
                    }
                } else {
                    logger.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath);
                }
            }
        }
    }

    for (Entry<String, String> entry : jarMd5Map.entrySet()) {
        Path localJarPath = new Path(entry.getKey());
        String jarFilename = localJarPath.getName();
        String localMd5sum = entry.getValue();
        logger.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum);

        Path remoteJarPath = new Path(libDir, jarFilename);
        Path remoteMd5Path = new Path(libDir, jarFilename + ".md5");

        // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS,
        // we force the upload of the jar.
        if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) {
            copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
        } else {
            // If the jar exist,we validate the MD5 file.
            // If the MD5 sum is different, we upload the jar
            FSDataInputStream md5FileStream = null;

            String remoteMd5sum = "";
            try {
                md5FileStream = fs.open(remoteMd5Path);
                byte[] md5bytes = new byte[32];
                if (32 == md5FileStream.read(md5bytes)) {
                    remoteMd5sum = new String(md5bytes, Charsets.UTF_8);
                }
            } finally {
                if (md5FileStream != null) {
                    md5FileStream.close();
                }
            }

            if (localMd5sum.equals(remoteMd5sum)) {
                logger.info("Jar {} already exists [{}] and md5sum are equals", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
            } else {
                logger.info("Jar {} already exists [{}] and md5sum are different!", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
                copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
            }

        }
        // In all case we want to add the jar to the DistributedCache's classpath
        DistributedCache.addFileToClassPath(remoteJarPath, conf, fs);
    }
    // and we create the symlink (was necessary in earlier versions of Hadoop)
    DistributedCache.createSymlink(conf);
}

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop/* www .  j  a  v  a  2  s.c  o  m*/
public void testHadoopMinicluster() throws Exception {
    JobConf conf = getHadoopConf();
    Assert.assertNotNull(conf);
    FileSystem fs = FileSystem.get(conf);
    Assert.assertNotNull(fs);
    Assert.assertEquals(fs.getUri().getScheme(), "hdfs");
    Assert.assertTrue(fs.exists(getHadoopTestDir()));
    fs.close();
    JobClient jobClient = new JobClient(conf);
    Assert.assertNotNull(jobClient);
    jobClient.close();
}

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop//www .  j  a  va 2s  .c o  m
public void testHadoopMapReduce() throws Exception {
    JobConf conf = getHadoopConf();
    FileSystem fs = FileSystem.get(conf);
    JobClient jobClient = new JobClient(conf);
    try {
        Path inputDir = new Path(getHadoopTestDir(), "input");
        Path outputDir = new Path(getHadoopTestDir(), "output");

        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("a\n");
        writer.write("b\n");
        writer.write("c\n");
        writer.close();

        JobConf jobConf = getHadoopConf();
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        final RunningJob runningJob = jobClient.submitJob(jobConf);
        waitFor(60 * 1000, true, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return runningJob.isComplete();
            }
        });
        Assert.assertTrue(runningJob.isSuccessful());
        Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000")));
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(fs.open(new Path(outputDir, "part-00000"))));
        Assert.assertTrue(reader.readLine().trim().endsWith("a"));
        Assert.assertTrue(reader.readLine().trim().endsWith("b"));
        Assert.assertTrue(reader.readLine().trim().endsWith("c"));
        Assert.assertNull(reader.readLine());
        reader.close();
    } finally {
        fs.close();
        jobClient.close();
    }
}

From source file:com.cloudera.crunch.impl.mr.exec.CrunchJob.java

License:Open Source License

private void handleMultiPaths() throws IOException {
    if (!multiPaths.isEmpty()) {
        // Need to handle moving the data from the output directory of the
        // job to the output locations specified in the paths.
        FileSystem fs = FileSystem.get(job.getConfiguration());
        for (int i = 0; i < multiPaths.size(); i++) {
            Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*");
            Path[] srcs = FileUtil.stat2Paths(fs.globStatus(src), src);
            Path dst = multiPaths.get(i);
            if (!fs.exists(dst)) {
                fs.mkdirs(dst);/*from ww  w. j  av  a  2  s  .com*/
            }
            int minPartIndex = getMinPartIndex(dst, fs);
            for (Path s : srcs) {
                fs.rename(s, getDestFile(s, dst, minPartIndex++));
            }
        }
    }
}

From source file:com.cloudera.crunch.impl.mr.MRPipeline.java

License:Open Source License

private void cleanup() {
    if (!outputTargets.isEmpty()) {
        LOG.warn("Not running cleanup while output targets remain");
        return;//from  ww  w.j  a v a 2s  . co m
    }
    try {
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(tempDirectory)) {
            fs.delete(tempDirectory, true);
        }
    } catch (IOException e) {
        LOG.info("Exception during cleanup", e);
    }
}