Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists


In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.


public boolean exists(Path f) throws IOException 

Source Link


Check if a path exists.


From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java

License:Apache License

public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
    Configuration conf = job;//from w  ww .j a  va 2s. c  o m
    // Use the input format defined in the job. NOT, the one provided by
    // the parent class's writePartitionFile() method, which will be a plain
    // TextInputFormat, by default
    final InputFormat inf = job.getInputFormat();
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
        writer.append(samples[k], nullValue);
        last = k;

From source file:com.alexholmes.hdfsslurper.Configurator.java

License:Apache License

public static void testCreateDir(Path p, Configuration conf)
        throws IOException, ConfigSettingException, FileSystemMkdirFailed {
    FileSystem fs = p.getFileSystem(conf);
    if (fs.exists(p) && !fs.getFileStatus(p).isDir()) {
        throw new ConfigSettingException("Directory appears to be a file: '" + p + "'");
    }/*from  ww  w.ja  v  a 2 s  .c o m*/

    if (!fs.exists(p)) {
        log.info("Attempting creation of directory: " + p);
        if (!fs.mkdirs(p)) {
            throw new FileSystemMkdirFailed("Failed to create directory: '" + p + "'");

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;//from  w  w w. j  av a  2s .  c  o  m
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());

        // if the staging directory doesn't exist, attempt to create it
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());

        // copy the file
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);


    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);



From source file:com.alexholmes.json.mapreduce.ExampleJob.java

License:Apache License

 * Writes the contents of {@link #JSON} into a file in the job input directory in HDFS.
 * @param conf     the Hadoop config// w w w . j  av a 2 s  .  co m
 * @param inputDir the HDFS input directory where we'll write a file
 * @throws IOException if something goes wrong
public static void writeInput(Configuration conf, Path inputDir) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    if (fs.exists(inputDir)) {
        throw new IOException(
                String.format("Input directory '%s' exists - please remove and rerun this example", inputDir));

    OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt")));

From source file:com.alibaba.jstorm.hdfs.spout.HdfsSpout.java

License:Apache License

private static void validateOrMakeDir(FileSystem fs, Path dir, String dirDescription) {
    try {/*from  w  w w . jav  a2  s . c om*/
        if (fs.exists(dir)) {
            if (!fs.isDirectory(dir)) {
                LOG.error(dirDescription + " directory is a file, not a dir. " + dir);
                throw new RuntimeException(dirDescription + " directory is a file, not a dir. " + dir);
        } else if (!fs.mkdirs(dir)) {
            LOG.error("Unable to create " + dirDescription + " directory " + dir);
            throw new RuntimeException("Unable to create " + dirDescription + " directory " + dir);
    } catch (IOException e) {
        LOG.error("Unable to create " + dirDescription + " directory " + dir, e);
        throw new RuntimeException("Unable to create " + dirDescription + " directory " + dir, e);

From source file:com.anhth12.lambda.ml.MLUpdate.java

public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData,
        JavaPairRDD<String, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {


    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();/*from  w w w  .j  av a2  s.c  om*/
    if (pastData != null) {

    List<HyperParamValues<?>> hyperParamValues = getHyperParamValues();

    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);

    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        fs.rename(bestCandidatePath, finalPath);

    fs.delete(candiatesPath, true);

    Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);

    if (fs.exists(bestModelPath)) {
        PMML bestModel;
        try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) {
            bestModel = PMMLUtils.read(in);

        modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
        publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic);

    if (newData != null) {

    if (pastData != null) {


From source file:com.anhth12.lambda.ml.MLUpdate.java

private Path findBestCandidatePath(JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData,
        List<List<?>> hyperParameterCombos, Path candiatesPath) throws InterruptedException, IOException {

    Map<Path, Double> pathToEval = new HashMap<>(candidates);
    if (evalParallelism > 1) {
        Collection<Future<Tuple2<Path, Double>>> futures = new ArrayList<>(candidates);
        ExecutorService executor = Executors.newFixedThreadPool(evalParallelism);

        try {/*from  w  ww .j a  va2  s . co m*/
            for (int i = 0; i < candidates; i++) {
                futures.add(executor.submit(new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext,
                        newData, pastData, candiatesPath)));
        } finally {

        for (Future<Tuple2<Path, Double>> future : futures) {
            Tuple2<Path, Double> pathEval;
            try {
                pathEval = future.get();
            } catch (ExecutionException ex) {
                throw new IllegalStateException(ex);
            pathToEval.put(pathEval._1, pathEval._2);
    } else {
        for (int i = 0; i < candidates; i++) {
            Tuple2<Path, Double> pathEval = new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext,
                    newData, pastData, candiatesPath).call();
            pathToEval.put(pathEval._1, pathEval._2);

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path bestCandidatePath = null;

    double bestEval = Double.NEGATIVE_INFINITY;

    for (Map.Entry<Path, Double> pathEval : pathToEval.entrySet()) {
        Path path = pathEval.getKey();
        Double eval = pathEval.getValue();

        if ((bestCandidatePath == null) || (eval != null && eval > bestEval) && fs.exists(path)) {
            log.info("Best eval / path is now {} / {}", eval, path);
            if (eval != null) {
                bestEval = eval;
            bestCandidatePath = path;

    return bestCandidatePath;

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }//from www .j  av a 2 s .c  om

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;

    goodbye(job, programStartTime);
    return 0;

From source file:com.asakusafw.cleaner.main.HDFSCleaner.java

License:Apache License

 * ?//w  w  w . j  a v a 2 s  .c  o  m
 * @param fs HDFS?
 * @param cleanPath HDFS??
 * @param isSetExecutionId ID????????
 * @param pattern 
 * @param keepDate ??
 * @param now ?
 * @param recursive ????
 * @return ?
 * @throws CleanerSystemException
private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate,
        Date now, boolean recursive) throws CleanerSystemException {
    try {
        if (!fs.exists(cleanPath)) {
            // ??????
            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                    "??????", cleanPath.toString());
            return false;
        if (!fs.getFileStatus(cleanPath).isDir()) {
            // ??????
            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                    "??????", cleanPath.toString());
            return false;

        // ?
        Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString());
        int cleanFileCount = 0;
        int cleanDirCount = 0;
        boolean result = true;
        FileStatus[] dirStatus = getListStatus(fs, cleanPath);
        Path[] listedPaths = FileUtil.stat2Paths(dirStatus);
        for (Path path : listedPaths) {
            FileStatus status = fs.getFileStatus(path);
            long lastModifiedTime = status.getModificationTime();
            if (status.isDir() && recursive) {
                // ????????
                if (isSetExecutionId) {
                    // ID??????MM???????
                    String executionId = path.getName();
                    if (isRunningJobFlow(executionId)) {
                        // ???????
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString());
                FileStatus[] childdirStatus = getListStatus(fs, path);
                if (childdirStatus.length == 0) {
                    // ???????
                    if (isExpired(lastModifiedTime, keepDate, now)) {
                        if (!fs.delete(path, false)) {
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                            result = false;
                        } else {
                            Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                } else {
                    // ?????????
                    if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) {
                        // ????????
                        childdirStatus = getListStatus(fs, path);
                        if (childdirStatus.length == 0) {
                            if (isExpired(lastModifiedTime, keepDate, now)) {
                                if (!fs.delete(path, false)) {
                                    Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                                    result = false;
                                } else {
                                    Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                    } else {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                        result = false;
            } else if (!status.isDir()) {
                // ???????????
                if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) {
                    if (!fs.delete(path, false)) {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                        result = false;
                    } else {
                        Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString());

        Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount,

        return result;
    } catch (IOException e) {
        Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName());
        return false;

From source file:com.asakusafw.compiler.util.tester.HadoopDriver.java

License:Apache License

 * Cleans up the temporary working area.
 * @throws IOException if failed to clean up
 *//*from  w  w  w. ja v  a2s  . c  om*/
public void clean() throws IOException {
    logger.info("clean user directory");
    Path path = new Path(toPath().toPath('/'));
    FileSystem fs = path.getFileSystem(configuration);
    try {
        if (fs.exists(path)) {
            fs.delete(path, true);
    } catch (IOException e) {
        logger.info(MessageFormat.format("Failed to fs -rmr {0}", toPath()), e);