Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf/*  w  w w .  j  ava  2  s .co  m*/
 */
public static void commitTask(JobConf conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("committing task: '" + taskId + "' - " + taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - "
                        + taskOutputPath);

            LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static synchronized void cleanTempPath(JobConf conf, Path outputPath) throws IOException {
    // do the clean up of temporary directory

    if (outputPath != null) {
        FileSystem fileSys = getFSSafe(conf, outputPath);

        if (fileSys == null)
            return;

        if (!fileSys.exists(outputPath))
            return;

        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);

        LOG.info("deleting temp path " + tmpDir);

        if (fileSys.exists(tmpDir))
            fileSys.delete(tmpDir, true);
    }/*from   www. j a v  a 2 s .com*/
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

public static void makeTempPath(JobConf conf) throws IOException {
    // create job specific temporary directory in output path
    Path outputPath = FileOutputFormat.getOutputPath(conf);

    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);
        FileSystem fileSys = tmpDir.getFileSystem(conf);

        if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir)) {
            LOG.error("mkdirs failed to create " + tmpDir.toString());
        }/*  w  w  w.ja  va 2s .com*/
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

public static boolean needsTaskCommit(Configuration conf) throws IOException {
    String workpath = conf.get("mapred.work.output.dir");

    if (workpath == null)
        return false;

    Path taskOutputPath = new Path(workpath);

    if (taskOutputPath != null) {
        FileSystem fs = getFSSafe(conf, taskOutputPath);

        if (fs == null)
            return false;

        if (fs.exists(taskOutputPath))
            return true;
    }/* w w w .  j  a  v  a2s. c o  m*/

    return false;
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf/*from w  w  w  .  ja  va 2s  . c  om*/
 */
public static void commitTask(Configuration conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    LOG.info("committing task: '{}' - {}", taskId, taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId,
                        taskOutputPath);

            LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static synchronized void cleanTempPath(Configuration conf, Path outputPath) throws IOException {
    // do the clean up of temporary directory

    if (outputPath != null) {
        FileSystem fileSys = getFSSafe(conf, outputPath);

        if (fileSys == null)
            return;

        if (!fileSys.exists(outputPath))
            return;

        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);

        LOG.info("deleting temp path {}", tmpDir);

        if (fileSys.exists(tmpDir))
            fileSys.delete(tmpDir, true);
    }/*from   ww w.j av a2 s  . c  om*/
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

public static void makeTempPath(Configuration conf) throws IOException {
    // create job specific temporary directory in output path
    Path outputPath = FileOutputFormat.getOutputPath(asJobConfInstance(conf));

    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);
        FileSystem fileSys = tmpDir.getFileSystem(conf);

        if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir))
            LOG.error("mkdirs failed to create {}", tmpDir);
    }/*w  w  w. j a  v  a2  s  . com*/
}

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

/**
 * Splits files returned by {@link #listPathsInternal(JobConf)}. Each file is
 * expected to be in zip format and each split corresponds to
 * {@link ZipEntry}./*from ww w .  j a v a2 s .c om*/
 *
 * @param job       the JobConf data structure, see {@link JobConf}
 * @param numSplits the number of splits required. Ignored here
 * @throws IOException if input files are not in zip format
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    if (LOG.isDebugEnabled())
        LOG.debug("start splitting input ZIP files");

    Path[] files = listPathsInternal(job);

    for (int i = 0; i < files.length; i++) { // check we have valid files
        Path file = files[i];
        FileSystem fs = file.getFileSystem(job);

        if (!fs.isFile(file) || !fs.exists(file))
            throw new IOException("not a file: " + files[i]);
    }

    // generate splits
    ArrayList<ZipSplit> splits = new ArrayList<ZipSplit>(numSplits);

    for (int i = 0; i < files.length; i++) {
        Path file = files[i];
        FileSystem fs = file.getFileSystem(job);

        if (LOG.isDebugEnabled())
            LOG.debug("opening zip file: " + file.toString());

        if (isAllowSplits(fs))
            makeSplits(job, splits, fs, file);
        else
            makeSplit(job, splits, file);
    }

    if (LOG.isDebugEnabled())
        LOG.debug("end splitting input ZIP files");

    return splits.toArray(new ZipSplit[splits.size()]);
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

/**
 * Runs this tool.//from  w  w  w.j ava 2s.c  o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg()
            .withDescription("probability of topic assignment").create(PCUTOFF));
    options.addOption(OptionBuilder.withArgName(INDEX).hasArg()
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    }
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);
    job.setJobName(AnnotateDocuments.class.getSimpleName());
    job.setJarByClass(AnnotateDocuments.class);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    job.setNumReduceTasks(reduceTasks);
    conf.setFloat(PCUTOFF, cutoff);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:cc.slda.DisplayTopic.java

License:Apache License

@SuppressWarnings("unchecked")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(Settings.HELP_OPTION, false, "print the help message");
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("input beta file").create(Settings.INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("term index file").create(ParseCorpus.INDEX));
    options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg()
            .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION));

    String betaString = null;/*from ww  w  .j ava  2  s  . c o  m*/
    String indexString = null;
    int topDisplay = TOP_DISPLAY;

    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(Settings.HELP_OPTION)) {
            formatter.printHelp(ParseCorpus.class.getName(), options);
            System.exit(0);
        }

        if (line.hasOption(Settings.INPUT_OPTION)) {
            betaString = line.getOptionValue(Settings.INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized...");
        }

        if (line.hasOption(ParseCorpus.INDEX)) {
            indexString = line.getOptionValue(ParseCorpus.INDEX);
        } else {
            throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized...");
        }

        if (line.hasOption(TOP_DISPLAY_OPTION)) {
            topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION));
        }
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
        formatter.printHelp(ParseCorpus.class.getName(), options);
        System.exit(0);
    } catch (NumberFormatException nfe) {
        System.err.println(nfe.getMessage());
        System.exit(0);
    }

    JobConf conf = new JobConf(DisplayTopic.class);
    FileSystem fs = FileSystem.get(conf);

    Path indexPath = new Path(indexString);
    Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path...");

    Path betaPath = new Path(betaString);
    Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path...");

    SequenceFile.Reader sequenceFileReader = null;
    try {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        Map<Integer, String> termIndex = new HashMap<Integer, String>();
        sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
        while (sequenceFileReader.next(intWritable, text)) {
            termIndex.put(intWritable.get(), text.toString());
        }

        PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
        // HMapIFW hmap = new HMapIFW();
        HMapIDW hmap = new HMapIDW();
        TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
        sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
        while (sequenceFileReader.next(pairOfIntFloat, hmap)) {
            treeMap.clear();

            System.out.println("==============================");
            System.out.println(
                    "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement());
            System.out.println("==============================");

            Iterator<Integer> itr1 = hmap.keySet().iterator();
            int temp1 = 0;
            while (itr1.hasNext()) {
                temp1 = itr1.next();
                treeMap.put(-hmap.get(temp1), temp1);
                if (treeMap.size() > topDisplay) {
                    treeMap.remove(treeMap.lastKey());
                }
            }

            Iterator<Double> itr2 = treeMap.keySet().iterator();
            double temp2 = 0;
            while (itr2.hasNext()) {
                temp2 = itr2.next();
                if (termIndex.containsKey(treeMap.get(temp2))) {
                    System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2);
                } else {
                    System.out.println("How embarrassing! Term index not found...");
                }
            }
        }
    } finally {
        IOUtils.closeStream(sequenceFileReader);
    }

    return 0;
}