List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/* w w w . j ava 2 s .co m*/ */ public static void commitTask(JobConf conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id"); LOG.info("committing task: '" + taskId + "' - " + taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - " + taskOutputPath); LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath); } } }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static synchronized void cleanTempPath(JobConf conf, Path outputPath) throws IOException { // do the clean up of temporary directory if (outputPath != null) { FileSystem fileSys = getFSSafe(conf, outputPath); if (fileSys == null) return; if (!fileSys.exists(outputPath)) return; Path tmpDir = new Path(outputPath, TEMPORARY_PATH); LOG.info("deleting temp path " + tmpDir); if (fileSys.exists(tmpDir)) fileSys.delete(tmpDir, true); }/*from www. j a v a 2 s .com*/ }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
public static void makeTempPath(JobConf conf) throws IOException { // create job specific temporary directory in output path Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path tmpDir = new Path(outputPath, TEMPORARY_PATH); FileSystem fileSys = tmpDir.getFileSystem(conf); if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir)) { LOG.error("mkdirs failed to create " + tmpDir.toString()); }/* w w w.ja va 2s .com*/ } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
public static boolean needsTaskCommit(Configuration conf) throws IOException { String workpath = conf.get("mapred.work.output.dir"); if (workpath == null) return false; Path taskOutputPath = new Path(workpath); if (taskOutputPath != null) { FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return false; if (fs.exists(taskOutputPath)) return true; }/* w w w . j a v a2s. c o m*/ return false; }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/*from w w w . ja va 2s . c om*/ */ public static void commitTask(Configuration conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); LOG.info("committing task: '{}' - {}", taskId, taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId, taskOutputPath); LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath); } } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static synchronized void cleanTempPath(Configuration conf, Path outputPath) throws IOException { // do the clean up of temporary directory if (outputPath != null) { FileSystem fileSys = getFSSafe(conf, outputPath); if (fileSys == null) return; if (!fileSys.exists(outputPath)) return; Path tmpDir = new Path(outputPath, TEMPORARY_PATH); LOG.info("deleting temp path {}", tmpDir); if (fileSys.exists(tmpDir)) fileSys.delete(tmpDir, true); }/*from ww w.j av a2 s . c om*/ }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
public static void makeTempPath(Configuration conf) throws IOException { // create job specific temporary directory in output path Path outputPath = FileOutputFormat.getOutputPath(asJobConfInstance(conf)); if (outputPath != null) { Path tmpDir = new Path(outputPath, TEMPORARY_PATH); FileSystem fileSys = tmpDir.getFileSystem(conf); if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir)) LOG.error("mkdirs failed to create {}", tmpDir); }/*w w w. j a v a2 s . com*/ }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
/** * Splits files returned by {@link #listPathsInternal(JobConf)}. Each file is * expected to be in zip format and each split corresponds to * {@link ZipEntry}./*from ww w . j a v a2 s .c om*/ * * @param job the JobConf data structure, see {@link JobConf} * @param numSplits the number of splits required. Ignored here * @throws IOException if input files are not in zip format */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("start splitting input ZIP files"); Path[] files = listPathsInternal(job); for (int i = 0; i < files.length; i++) { // check we have valid files Path file = files[i]; FileSystem fs = file.getFileSystem(job); if (!fs.isFile(file) || !fs.exists(file)) throw new IOException("not a file: " + files[i]); } // generate splits ArrayList<ZipSplit> splits = new ArrayList<ZipSplit>(numSplits); for (int i = 0; i < files.length; i++) { Path file = files[i]; FileSystem fs = file.getFileSystem(job); if (LOG.isDebugEnabled()) LOG.debug("opening zip file: " + file.toString()); if (isAllowSplits(fs)) makeSplits(job, splits, fs, file); else makeSplit(job, splits, file); } if (LOG.isDebugEnabled()) LOG.debug("end splitting input ZIP files"); return splits.toArray(new ZipSplit[splits.size()]); }
From source file:cc.slda.AnnotateDocuments.java
License:Apache License
/** * Runs this tool.//from w w w.j ava 2s.c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg() .withDescription("probability of topic assignment").create(PCUTOFF)); options.addOption(OptionBuilder.withArgName(INDEX).hasArg() .withDescription("path to data directory containing term and title indices").create(INDEX)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String indexPath = cmdline.getOptionValue(INDEX); String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; float cutoff = 0.9f; if (cmdline.hasOption(PCUTOFF)) { cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF)); } LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName()); LOG.info(" - indices path: " + indexPath); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); LOG.info(" - log(probCutoff): " + Math.log(cutoff)); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Job job = Job.getInstance(conf); job.setJobName(AnnotateDocuments.class.getSimpleName()); job.setJarByClass(AnnotateDocuments.class); String termIndex = indexPath + Path.SEPARATOR + TERM; String titleIndex = indexPath + Path.SEPARATOR + TITLE; Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath); DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration()); Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath); DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration()); job.setNumReduceTasks(reduceTasks); conf.setFloat(PCUTOFF, cutoff); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapSIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HMapSIW.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:cc.slda.DisplayTopic.java
License:Apache License
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(Settings.HELP_OPTION, false, "print the help message"); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("input beta file").create(Settings.INPUT_OPTION)); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("term index file").create(ParseCorpus.INDEX)); options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg() .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION)); String betaString = null;/*from ww w .j ava 2 s . c o m*/ String indexString = null; int topDisplay = TOP_DISPLAY; CommandLineParser parser = new GnuParser(); HelpFormatter formatter = new HelpFormatter(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(Settings.HELP_OPTION)) { formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } if (line.hasOption(Settings.INPUT_OPTION)) { betaString = line.getOptionValue(Settings.INPUT_OPTION); } else { throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized..."); } if (line.hasOption(ParseCorpus.INDEX)) { indexString = line.getOptionValue(ParseCorpus.INDEX); } else { throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized..."); } if (line.hasOption(TOP_DISPLAY_OPTION)) { topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION)); } } catch (ParseException pe) { System.err.println(pe.getMessage()); formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } catch (NumberFormatException nfe) { System.err.println(nfe.getMessage()); System.exit(0); } JobConf conf = new JobConf(DisplayTopic.class); FileSystem fs = FileSystem.get(conf); Path indexPath = new Path(indexString); Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path..."); Path betaPath = new Path(betaString); Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path..."); SequenceFile.Reader sequenceFileReader = null; try { IntWritable intWritable = new IntWritable(); Text text = new Text(); Map<Integer, String> termIndex = new HashMap<Integer, String>(); sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf); while (sequenceFileReader.next(intWritable, text)) { termIndex.put(intWritable.get(), text.toString()); } PairOfIntFloat pairOfIntFloat = new PairOfIntFloat(); // HMapIFW hmap = new HMapIFW(); HMapIDW hmap = new HMapIDW(); TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>(); sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf); while (sequenceFileReader.next(pairOfIntFloat, hmap)) { treeMap.clear(); System.out.println("=============================="); System.out.println( "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement()); System.out.println("=============================="); Iterator<Integer> itr1 = hmap.keySet().iterator(); int temp1 = 0; while (itr1.hasNext()) { temp1 = itr1.next(); treeMap.put(-hmap.get(temp1), temp1); if (treeMap.size() > topDisplay) { treeMap.remove(treeMap.lastKey()); } } Iterator<Double> itr2 = treeMap.keySet().iterator(); double temp2 = 0; while (itr2.hasNext()) { temp2 = itr2.next(); if (termIndex.containsKey(treeMap.get(temp2))) { System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2); } else { System.out.println("How embarrassing! Term index not found..."); } } } } finally { IOUtils.closeStream(sequenceFileReader); } return 0; }