Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs) 

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:org.commoncrawl.mapred.ec2.parser.OutputCommitter.java

License:Open Source License

Path getTempTaskOutputPath(TaskAttemptContext taskContext) {
    JobConf conf = taskContext.getJobConf();
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        Path p = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + taskContext.getTaskAttemptID().toString()));
        try {//  w  w w  . ja  v  a2 s .  c om
            FileSystem fs = p.getFileSystem(conf);
            return p.makeQualified(fs);
        } catch (IOException ie) {
            LOG.warn(StringUtils.stringifyException(ie));
            return p;
        }
    }
    return null;
}

From source file:org.elasticsearch.repositories.hdfs.HdfsSnapshotRestoreTest.java

License:Apache License

/**
 * Deletes content of the repository files in the bucket
 *//* w  w w  . j  av  a 2 s  .c o  m*/
public void cleanRepositoryFiles(String basePath) throws IOException {
    LocalFileSystem fs = FileSystem.getLocal(new Configuration());
    Path p = new Path(path);
    fs.delete(p.makeQualified(fs), true);
}

From source file:org.kiji.mapreduce.kvstore.FileKeyValueArrayStore.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w ww .  jav a  2s. co  m
public void storeToConf(KeyValueStoreConfiguration conf) throws IOException {
    if (mInputPaths.isEmpty()) {
        throw new IOException("Required attribute not set: input path");
    }

    conf.setLong(CONF_MAX_VALUES, mMaxValues);

    if (mUseDCache && !"local".equals(conf.getDelegate().get("mapred.job.tracker", ""))) {
        // If we're scheduled to use the distributed cache, and we're not in the LocalJobRunner,
        // add these files to the DistributedCache.

        // TODO(WIBI-1653): This does not handle any sort of MapperTester, etc.
        // We need a separate flag that tells this to ignore mUseDCache if we're in a test
        // environment, and just use the original input file specs.
        final String dCachePrefix = getCachePrefix();

        // Associate this randomly chosen prefix id with this KVStore implementation.
        conf.set(CONF_DCACHE_PREFIX, dCachePrefix);

        // Add the input paths to the DistributedCache and translate path names.
        int uniqueId = 0;
        for (Path inputPath : getExpandedInputPaths()) {
            FileSystem fs = inputPath.getFileSystem(conf.getDelegate());
            Path absolutePath = inputPath.makeQualified(fs);

            String uriStr = absolutePath.toString() + "#" + dCachePrefix + "-" + uniqueId;
            LOG.debug("Adding to DistributedCache: " + uriStr);
            uniqueId++;
            try {
                DistributedCache.addCacheFile(new URI(uriStr), conf.getDelegate());
            } catch (URISyntaxException use) {
                throw new IOException("Could not construct URI for file: " + uriStr, use);
            }
        }

        // Ensure that symlinks are created for cached files.
        DistributedCache.createSymlink(conf.getDelegate());

        // Now save the cache prefix into the local state.  We couldn't set this earlier,
        // because we wanted getExpandedInputPaths() to actually unglob things. That
        // function will behave differently if mDCachePrefix is already initialized.
        mDCachePrefix = dCachePrefix;
    } else {
        // Just put the regular HDFS paths in the Configuration.
        conf.setStrings(CONF_PATHS,
                Lists.toArray(Lists.map(mInputPaths, new Lists.ToStringFn<Path>()), String.class));
    }
}

From source file:org.kiji.mapreduce.kvstore.lib.FileStoreHelper.java

License:Apache License

/**
 * Serializes file- and DistributedCache-specific properties associated
 * with the KeyValueStore that owns this FileStoreHelper to the specified configuration.
 *
 * @param conf the configuration to populate.
 * @throws IOException if there's an error serializing the state.
 *//*from   w  w  w . j a v a2 s  .com*/
public void storeToConf(KeyValueStoreConfiguration conf) throws IOException {
    if (mInputPaths.isEmpty()) {
        throw new IOException("Required attribute not set: input path");
    }

    if (mUseDCache && !"local".equals(conf.get("mapred.job.tracker", ""))) {
        // If we're scheduled to use the distributed cache, and we're not in the LocalJobRunner,
        // add these files to the DistributedCache.

        // TODO(aaron): This does not handle any sort of MapperTester, etc.
        // We need a separate flag that tells this to ignore mUseDCache if we're in a test
        // environment, and just use the original input file specs.
        final String dCachePrefix = getCachePrefix();

        // Associate this randomly chosen prefix id with this KVStore implementation.
        conf.set(CONF_DCACHE_PREFIX_KEY, dCachePrefix);

        // Add the input paths to the DistributedCache and translate path names.
        int uniqueId = 0;
        for (Path inputPath : getExpandedInputPaths()) {
            FileSystem fs = inputPath.getFileSystem(conf.getDelegate());
            Path absolutePath = inputPath.makeQualified(fs);
            String uriStr = absolutePath.toString() + "#" + dCachePrefix + "-" + uniqueId;
            LOG.debug("Adding to DistributedCache: " + uriStr);
            uniqueId++;
            try {
                DistributedCache.addCacheFile(new URI(uriStr), conf.getDelegate());
            } catch (URISyntaxException use) {
                throw new IOException("Could not construct URI for file: " + uriStr, use);
            }
        }

        // Ensure that symlinks are created for cached files.
        DistributedCache.createSymlink(conf.getDelegate());

        // Now save the cache prefix into the local state.  We couldn't set this earlier,
        // because we wanted getExpandedInputPaths() to actually unglob things. That
        // function will behave differently if mDCachePrefix is already initialized.
        mDCachePrefix = dCachePrefix;
    } else {
        // Just put the regular HDFS paths in the Configuration.
        conf.setStrings(CONF_PATHS_KEY,
                Lists.toArray(Lists.map(mInputPaths, new Lists.ToStringFn<Path>()), String.class));
    }
}

From source file:org.kiji.mapreduce.output.HFileMapReduceJobOutput.java

License:Apache License

/**
 * Configures the partitioner for generating HFiles.
 *
 * <p>Each generated HFile should fit within a region of of the target table.
 * Additionally, it's optimal to have only one HFile to load into each region, since a
 * read from that region will require reading from each HFile under management (until
 * compaction happens and merges them all back into one HFile).</p>
 *
 * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the
 * records output from the Mapper based on their rank in a total ordering of the
 * keys.  The <code>startKeys</code> argument should contain a list of the first key in
 * each of those partitions.</p>//from  w  w  w  .j a v a 2  s .c o m
 *
 * @param job The job to configure.
 * @param startKeys A list of keys that will mark the boundaries between the partitions
 *     for the sorted map output records.
 * @throws IOException If there is an error.
 */
private static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException {
    job.setPartitionerClass(TotalOrderPartitioner.class);

    LOG.info("Configuring " + startKeys.size() + " reduce partitions.");
    job.setNumReduceTasks(startKeys.size());

    // Write the file that the TotalOrderPartitioner reads to determine where to partition records.
    Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionFilePath);

    final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration());
    partitionFilePath = partitionFilePath.makeQualified(fs);
    writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys);

    // Add it to the distributed cache.
    try {
        final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
        DistributedCache.addCacheFile(cacheUri, job.getConfiguration());
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.createSymlink(job.getConfiguration());
}

From source file:org.mitre.ccv.mapred.CompleteCompositionVectors.java

License:Open Source License

/**
 *
 * The JSO data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features
 * will be in a different order. This version, by default sorts, only by entropy values, whereas the
 * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic).
 * @param argv/*from  w w w  . ja v  a2s  . c  o m*/
 * @return
 * @throws java.lang.Exception
 */
@Override
@SuppressWarnings("static-access") // For OptionBuilder
public int run(String[] argv) throws Exception {
    JobConf conf = new JobConf(getConf());
    String cli_title = "CompleteCompositionVectorHadoop";

    int start = CalculateKmerCounts.DEFAULT_START;
    int end = CalculateKmerCounts.DEFAULT_END;
    int topkmers = 0;

    String input = null;
    String output = null;
    String vectorJsonOutput = null;
    //String kmerJsonOutput = null;

    boolean cleanLogs = false;

    /** create the Options */
    Options options = new Options();

    /** Hadoop Options */
    options.addOption(
            OptionBuilder.withArgName("number").hasArg(true).withDescription("number of maps").create("m"));
    options.addOption(
            OptionBuilder.withArgName("number").hasArg(true).withDescription("number of reducers").create("r"));

    // org.hadoop.util.GenericOptionsParser should captures this, but it doesn't
    options.addOption(OptionBuilder.withArgName("property=value").hasArg(true).withValueSeparator()
            .withDescription("use value for given property").create("D"));

    /** CompleteCompositionVector Options */
    options.addOption(OptionBuilder.withArgName("number").hasArg(true)
            .withDescription("number of top k-mers to use in calculations").create("topKmers"));
    options.addOption(OptionBuilder.withArgName("start").hasArg(true).withDescription("starting length of tile")
            .create("start"));
    options.addOption(OptionBuilder.withArgName("end").hasArg(true).withDescription("ending length of title")
            .create("end"));
    options.addOption(OptionBuilder.hasArg(true).withArgName("file")
            .withDescription("JSON file to write out k-mers to").create("kmersfile"));

    options.addOption(OptionBuilder.hasArg(true).withArgName("file")
            .withDescription("JSON file to write out feature vectors to "
                    + "(Overrides kmersout, only one file will be written).")
            .create("vectorsfile"));

    options.addOption(OptionBuilder.withArgName("number").hasArg(true)
            .withDescription("What preference to use: 0-min 1-median 2-avg(min,med): default is median")
            .create("prefval"));

    options.addOption(OptionBuilder.withArgName("help").hasArg(false).withDescription("print this message")
            .create("help"));

    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();

    //GenericOptionsParser gop = new GenericOptionsParser(conf, options, argv);
    GenericOptionsParser gop = new GenericOptionsParser(conf, argv);

    String[] remaining_args = gop.getRemainingArgs();

    // create the parser
    CommandLineParser parser = new GnuParser();
    //CommandLine line = gop.getCommandLine();
    String[] other_args = new String[] {};

    try {
        CommandLine line = parser.parse(options, remaining_args);
        other_args = line.getArgs();

        // Make sure there is a parameter left.
        if (other_args.length == 0) {
            System.out.println(cli_title);
            System.out.println("Missing input path!");
            formatter.printHelp("hccv [options] <input> [<output>] ", options);
            GenericOptionsParser.printGenericCommandUsage(System.out);
            return -1;
        }

        Option[] opts = line.getOptions();
        if (line.hasOption("help")) {
            System.out.println(cli_title);
            formatter.printHelp("hccv [options] <input> [<output>] ", options);
            GenericOptionsParser.printGenericCommandUsage(System.out);
            return -1;
        }

        // could also use line.iterator()
        for (Option opt : opts) {
            if (opt.getOpt().equals("m")) {
                conf.setNumMapTasks(Integer.parseInt(opt.getValue()));
            }
            if (opt.getOpt().equals("r")) {
                conf.setNumReduceTasks(Integer.parseInt(opt.getValue()));
            }
            if (opt.getOpt().equals("D")) {
                // We can have multiple properties we want to set
                String[] properties = opt.getValues();
                for (String property : properties) {
                    String[] keyval = property.split("=");
                    conf.set(keyval[0], keyval[1]);
                }
            }
            if (opt.getOpt().equals("start")) {
                start = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("end")) {
                end = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("topKmers")) {
                topkmers = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("vectorsfile")) {
                vectorJsonOutput = opt.getValue();
            }
        }
    } catch (ParseException e) {
        LOG.warn("options parsing faild: " + e.getMessage());
        System.out.println(cli_title);
        formatter.printHelp("hccv [options] <input> [<output>] ", options);
        GenericOptionsParser.printGenericCommandUsage(System.out);
    }
    if (start <= 2) {
        throw new IllegalArgumentException("Value of 'start' argument must be larger than 2");
    }

    input = other_args[0];
    if (other_args.length < 2) {
        output = input + "_" + FileUtils.getSimpleDate();
    } else {
        output = other_args[2];
    }

    /**
     * Check output path. Either needs to exist as a directory or not exist
     */
    Path outputPath = new Path(output);
    FileSystem fs = outputPath.getFileSystem(conf);
    if (!fs.exists(outputPath)) {
        fs.mkdirs(outputPath);
    } else if (fs.exists(outputPath) || !fs.getFileStatus(outputPath).isDir()) {
        LOG.fatal(String.format("Output directory %s already exists", outputPath.makeQualified(fs)));
        throw new FileAlreadyExistsException(
                String.format("Output directory %s already exists", outputPath.makeQualified(fs)));
    }

    String outputDir = output + Path.SEPARATOR;

    int res;
    /**
     * Zero, CalculateCompositionVectors
     */
    LOG.info("Starting CalculateCompositionVectors Map-Reduce job");
    CalculateCompositionVectors cv = new CalculateCompositionVectors();
    res = cv.initJob(conf, start, end, input, outputDir + COMPOSITION_VECTORS, cleanLogs);
    if (res != 0) {
        LOG.info("CalculateCompositionVectors returned non-zero result!");
        return res;
    }
    // We can stop now or continue to reduce dimensionallity using RRE or other means

    /**
     * First, CalculateKmerCounts
     */
    LOG.info("Starting CalculateKmerCounts Map-Reduce job");
    // FastMap option for CalculateKmers!?!
    CalculateKmerCounts ckc = new CalculateKmerCounts();
    res = ckc.initJob(conf, start, end, input, outputDir + KMER_COUNTS);
    if (res != 0) {
        LOG.fatal("CalculateKmerCounts returned non-zero result!");
        return res;
    }

    /**
     * Second, TotalSequenceLength
     */
    LOG.info("Starting TotalSequenceLength Map-Reduce job");
    TotalSequenceLength tsl = new TotalSequenceLength();
    res = tsl.initJob(conf, input, outputDir + TOTAL_LENGTH, cleanLogs);
    if (res != 0) {
        LOG.fatal("TotalSequenceLength returned non-zero result!");
        return res;
    }
    int length = tsl.getCount(conf, outputDir + TOTAL_LENGTH);

    if (length < 3) {
        LOG.fatal("TotalSequenceLength returned a total sequence length of less than 3.");
        return -1;
    } else {
        LOG.info(String.format("TotalSequenceLength returned a total sequence length of %d.", length));
    }

    /**
     * Third, CalculateKmerProbabilities
     */
    LOG.info("Starting CalculateKmerProbabilities Map-Reduce job");
    CalculateKmerProbabilities ckp = new CalculateKmerProbabilities();
    res = ckp.initJob(conf, start, end, length, outputDir + KMER_COUNTS, outputDir + KMER_PROBABILITIES,
            cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerProbabilities returned non-zero result!");
        return res;
    }

    /**
     * Fourth, InvertKmerProbabilities
     */
    LOG.info("Starting InvertKmerProbabilities Map-Reduce job");
    InvertKmerProbabilities ikp = new InvertKmerProbabilities();
    res = ikp.initJob(conf, outputDir + KMER_PROBABILITIES, outputDir + INVERTED_KMER_PROBABILITIES, cleanLogs);
    if (res != 0) {
        LOG.fatal("InvertKmerProbabilities returned non-zero result!");
        return res;
    }

    /**
     * Fifth, CalculateKmerPiValues
     */
    LOG.info("Starting CalculateKmerPiValues Map-Reduce job");
    CalculateKmerPiValues kpv = new CalculateKmerPiValues();
    res = kpv.initJob(conf, start, end, outputDir + INVERTED_KMER_PROBABILITIES, outputDir + KMER_PI_VALUES,
            cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerPiValues returned non-zero result!");
        return res;
    }

    /**
     * Sixth,CalculateKmerRevisedRelativeEntropy
     */
    LOG.info("Starting CalculateKmerRevisedRelativeEntropy Map-Reduce job");
    CalculateKmerRevisedRelativeEntropy krre = new CalculateKmerRevisedRelativeEntropy();
    res = krre.initJob(conf, outputDir + KMER_PI_VALUES, outputDir + COMPOSITION_VECTORS,
            outputDir + ENTROPY_VALUES, cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerRevisedRelativeEntropy returned non-zero result!");
        return res;
    }

    /**
     * Seventh, SortKmerRevisedRelativeEntropies
     */
    SortKmerRevisedRelativeEntropies srre = new SortKmerRevisedRelativeEntropies();
    res = srre.initJob(conf, outputDir + ENTROPY_VALUES, outputDir + SORTED_ENTROPY_VALUES, cleanLogs);
    if (res != 0) {
        LOG.fatal("SortKmerRevisedRelativeEntropies returned non-zero result!");
        return res;
    }

    /**
     * Eigth, GenerateFeatureVectors
     *
     * Generate a flatten list to add to the cache to be distributed to the map-tasks.
     */
    Path listOutputPath = new Path(outputDir + Integer.toString(topkmers) + KMER_ENTROPY_SET);
    LOG.info(String.format("Loading %d sorted k-mers from %s to %s", topkmers,
            outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString()));
    int num = CompleteCompositionVectorUtils.flattenKmerEntropySequenceFile(conf, topkmers,
            outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString(), cleanLogs);

    if (num != topkmers) {
        LOG.fatal(String.format("Requested %d k-mers, but got %d. Using %d", topkmers, num, num));
        topkmers = num;
    }
    GenerateFeatureVectors fv = new GenerateFeatureVectors();
    res = fv.initJob(conf, listOutputPath.toString(), topkmers, outputDir + COMPOSITION_VECTORS,
            outputDir + FEATURE_VECTORS, cleanLogs);
    if (res != 0) {
        LOG.fatal("GenerateFeatureVectors returned non-zero result!");
        return res;
    }

    /**
     * Save feature vectors, features (k-mers), and properties to a JSON file.
     *
     * The data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features
     * will be in a different order. This version, by default sorts, only by entropy values, whereas the
     * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic).
     */
    if (vectorJsonOutput != null && vectorJsonOutput.length() > 0) {
        LOG.info("Writing features out to " + vectorJsonOutput);
        CompleteCompositionVectorUtils.featureVectors2Json(conf, start, end, topkmers,
                outputDir + SORTED_ENTROPY_VALUES, outputDir + FEATURE_VECTORS, vectorJsonOutput);
    }

    LOG.info("All done generating complete composition vectors and feature vectors.");
    return res;
}

From source file:org.mitre.mapred.fs.FileUtils.java

License:Open Source License

/**
 * Takes input as a comma separated list of files
 * and verifies if they exist. It defaults for file:///
 * if the files specified do not have a scheme.
 * it returns the paths uri converted defaulting to file:///.
 * So an input of  /home/user/file1,/home/user/file2 would return
 * file:///home/user/file1,file:///home/user/file2
 *
 * @see org.apache.hadoop.util.GenericOptionsParser#validateFiles(java.lang.String, org.apache.hadoop.conf.Configuration)
 * @param files//  w w  w.j av  a 2  s .  c  om
 * @return the paths converted to URIs
 */
public static String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null) {
        return null;
    }
    String[] fileArr = files.split(",");
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        Path path = new Path(tmp);
        URI pathURI = path.toUri();
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            //default to the local file system
            //check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
            try {
                fs.close();
            } catch (IOException e) {
            }
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:org.mrgeo.cmd.ingestvector.IngestVector.java

License:Apache License

List<String> getInputs(String arg, boolean recurse) throws IOException {
    GeotoolsVectorReader reader = null;/*from w  w  w.  j  av  a  2  s .c  o  m*/

    List<String> inputs = new LinkedList<String>();

    File f = new File(arg);
    URI uri = f.toURI();
    // recurse through directories
    if (f.isDirectory()) {
        File[] dir = f.listFiles();

        for (File s : dir) {
            try {
                if (s.isFile() || (s.isDirectory() && recurse)) {
                    inputs.addAll(getInputs(s.getCanonicalPath(), recurse));
                }
            } catch (IOException e) {
            }
        }
    } else if (f.isFile()) {
        // is this a valid file?
        System.out.print("*** checking " + f.getCanonicalPath());
        try {
            reader = GeotoolsVectorUtils.open(uri);

            if (reader != null) {
                System.out.println(" accepted ***");
                inputs.add(uri.toString());
            } else {
                System.out.println(" can't load ***");
            }
        } catch (IOException e) {
            System.out.println(" can't load ***");
        }
    } else {
        Path p = new Path(arg);
        FileSystem fs = HadoopFileUtils.getFileSystem(config, p);
        if (fs.exists(p)) {
            FileStatus status = fs.getFileStatus(p);

            if (status.isDir() && recurse) {
                FileStatus[] files = fs.listStatus(p);
                for (FileStatus file : files) {
                    inputs.addAll(getInputs(file.getPath().toString(), recurse));
                }
            } else {
                // is this a valid file?
                System.out.print("*** checking " + p.toString());
                try {
                    reader = GeotoolsVectorUtils.open(p.makeQualified(fs).toUri());
                    if (reader != null) {
                        System.out.println(" accepted ***");
                        inputs.add(p.toString());
                    } else {
                        System.out.println(" can't load ***");
                    }
                } catch (IOException e) {
                    System.out.println(" can't load ***");
                }
            }
        }
    }

    return inputs;
}

From source file:org.mrgeo.format.DirectoryInputFormat.java

License:Apache License

public static void setParentDirectory(Job job, Path parent) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Path path = parent.makeQualified(fs);
    conf.set("mapred.input.dir", StringUtils.escapeString(path.toString()));
}

From source file:org.mrgeo.hdfs.utils.HadoopFileUtils.java

License:Apache License

/**
 * Deletes the specified path. If the scheme is s3 or s3n, then it will wait
 * until the path is gone to return or else throw an IOException indicating
 * that the path still exists. This is because s3 operates under eventual
 * consistency so deletes are not guarantted to happen right away.
 *
 * @param conf/* w ww.jav  a 2s  .c o m*/
 * @param path
 * @throws IOException
 */
public static void delete(final Configuration conf, final Path path) throws IOException {
    final FileSystem fs = getFileSystem(conf, path);
    if (fs.exists(path)) {
        log.info("Deleting path " + path.toString());
        if (fs.delete(path, true) == false) {
            throw new IOException("Error deleting directory " + path.toString());
        }
        Path qualifiedPath = path.makeQualified(fs);
        URI pathUri = qualifiedPath.toUri();
        String scheme = pathUri.getScheme().toLowerCase();
        if ("s3".equals(scheme) || "s3n".equals(scheme)) {
            boolean stillExists = fs.exists(path);
            int sleepIndex = 0;
            // Wait for S3 to finish the deletion in phases - initially checking
            // more frequently and then less frequently as time goes by.
            int[][] waitPhases = { { 60, 1 }, { 120, 2 }, { 60, 15 } };
            while (sleepIndex < waitPhases.length) {
                int waitCount = 0;
                log.info("Sleep index " + sleepIndex);
                while (stillExists && waitCount < waitPhases[sleepIndex][0]) {
                    waitCount++;
                    log.info("Waiting " + waitPhases[sleepIndex][1] + " seconds " + path.toString()
                            + " to be deleted");
                    try {
                        Thread.sleep(waitPhases[sleepIndex][1] * 1000L);
                    } catch (InterruptedException e) {
                        log.warn("While waiting for " + path.toString() + " to be deleted", e);
                    }
                    stillExists = fs.exists(path);
                    log.info("After waiting exists = " + stillExists);
                }
                sleepIndex++;
            }
            if (stillExists) {
                throw new IOException(path.toString() + " was not deleted within the waiting period");
            }
        }
    } else {
        log.info("Path already does not exist " + path.toString());
    }
}