List of usage examples for org.apache.hadoop.fs Path isAbsolute
public boolean isAbsolute()
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
private Path resolvePath(String pathString) { Path path = new Path(pathString); if (!path.isAbsolute()) path = new Path(_homeDir, pathString); return path;/*from ww w. j a va 2 s.com*/ }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
private void checkPath(FileSystem fs, Path path) throws PermissionException, NotFoundException, IOException { if (_disallowLocalInput) { // If we update to Hadoop 1.0, we should use the canonical URI which is definitely unique to each file system. However, the normal one should be, too. if (fs.getUri().equals(FileSystem.getLocal(new Configuration()).getUri())) { throw new PermissionException("Not allowed to read from the local file system."); }//from w w w.java 2 s.c o m } if (!fs.exists(path)) throw new NotFoundException("Input path does not exist: " + path.toString()); if (_enforceInputContainment) { // Check that path is inside home directory Path relativePath = relativizePath(_homeDir, path); if (relativePath.isAbsolute()) ; // Has authority or begins with "/" throw new PermissionException("Not allowed to read outside the " + "WebMapReduce home directory (" + _homeDir.toString() + "). Please specify a relative path."); } }
From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaForwardIndex.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from w w w . j a v a 2 s .c o m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("tmp output directory") .create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String outputPath = cmdline.getOptionValue(OUTPUT_OPTION); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), BuildWikipediaForwardIndex.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - index file: " + indexFile); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); LOG.info(" - language: " + language); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already fs.delete(new Path(outputPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(outputPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF("edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex"); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } return 0; }
From source file:edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndexBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//from ww w.ja v a 2 s . c o m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder .withArgName("en|sv|nl|de|fr|ru|it|es|vi|pl|ja|pt|zh|uk|ca|fa|no|fi|id|ar|sr|ko|hi|zh_yue|cs|tr") .hasArg().withDescription("two-letter or six-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (!(language.length() == 2 || language.length() == 6)) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - index file: " + indexFile); LOG.info(" - language: " + language); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName()); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } // Clean up. fs.delete(new Path(tmpPath), true); return 0; }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Make a path relative with respect to a root path. absPath is always assumed * to descend from root. Otherwise returned path is null. */// w w w. ja va 2 s .co m static String makeRelative(final Path root, final Path absPath) { if (!absPath.isAbsolute()) { throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath); } String p = absPath.toUri().getPath(); StringTokenizer pathTokens = new StringTokenizer(p, "/"); for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens .hasMoreTokens();) { if (!rootTokens.nextToken().equals(pathTokens.nextToken())) { return null; } } StringBuilder sb = new StringBuilder(); for (; pathTokens.hasMoreTokens();) { sb.append(pathTokens.nextToken()); if (pathTokens.hasMoreTokens()) { sb.append(Path.SEPARATOR); } } return sb.length() == 0 ? "." : sb.toString(); }
From source file:gobblin.data.management.retention.profile.ConfigurableGlobDatasetFinder.java
License:Apache License
public ConfigurableGlobDatasetFinder(FileSystem fs, Properties jobProps, Config config) { for (String property : requiredProperties()) { Preconditions.checkArgument(config.hasPath(property) || config.hasPath(DEPRECATIONS.get(property)), String.format("Missing required property %s", property)); }//ww w .j a va2 s . c o m if (ConfigUtils.hasNonEmptyPath(config, DATASET_BLACKLIST_KEY)) { this.blacklist = Optional.of(Pattern.compile(config.getString(DATASET_BLACKLIST_KEY))); } else if (ConfigUtils.hasNonEmptyPath(config, DATASET_FINDER_BLACKLIST_KEY)) { this.blacklist = Optional.of(Pattern.compile(config.getString(DATASET_FINDER_BLACKLIST_KEY))); } else { this.blacklist = Optional.absent(); } this.fs = fs; Path tmpDatasetPattern; if (config.hasPath(DATASET_FINDER_PATTERN_KEY)) { tmpDatasetPattern = new Path(config.getString(DATASET_FINDER_PATTERN_KEY)); } else { tmpDatasetPattern = new Path(config.getString(DATASET_PATTERN_KEY)); } this.datasetPattern = tmpDatasetPattern.isAbsolute() ? tmpDatasetPattern : new Path(this.fs.getWorkingDirectory(), tmpDatasetPattern); this.commonRoot = PathUtils.deepestNonGlobPath(this.datasetPattern); this.props = jobProps; }
From source file:gobblin.data.management.trash.Trash.java
License:Apache License
/** * Create location of Trash directory. Parsed from props at key {@link #TRASH_LOCATION_KEY}, defaulting to * /home/directory/_GOBBLIN_TRASH./*www .j a va 2 s . c o m*/ * @param fs {@link org.apache.hadoop.fs.FileSystem} where trash should be found. * @param props {@link java.util.Properties} containing trash configuration. * @param user If the trash location contains the token $USER, the token will be replaced by the value of user. * @return {@link org.apache.hadoop.fs.Path} for trash directory. * @throws java.io.IOException */ protected Path createTrashLocation(FileSystem fs, Properties props, String user) throws IOException { Path trashLocation; if (props.containsKey(TRASH_LOCATION_KEY)) { trashLocation = new Path(props.getProperty(TRASH_LOCATION_KEY).replaceAll("\\$USER", user)); } else { trashLocation = new Path(fs.getHomeDirectory(), DEFAULT_TRASH_DIRECTORY); LOG.info("Using default trash location at " + trashLocation); } if (!trashLocation.isAbsolute()) { throw new IllegalArgumentException( "Trash location must be absolute. Found " + trashLocation.toString()); } Path qualifiedTrashLocation = fs.makeQualified(trashLocation); ensureTrashLocationExists(fs, qualifiedTrashLocation); return qualifiedTrashLocation; }
From source file:gobblin.data.management.trash.Trash.java
License:Apache License
/** * Move a path to trash. The absolute path of the input path will be replicated under the trash directory. * @param path {@link org.apache.hadoop.fs.FileSystem} path to move to trash. * @return true if move to trash was done successfully. * @throws IOException//from www . j a v a 2 s .c om */ @Override public boolean moveToTrash(Path path) throws IOException { Path fullyResolvedPath = path.isAbsolute() ? path : new Path(this.fs.getWorkingDirectory(), path); Path targetPathInTrash = PathUtils.mergePaths(this.trashLocation, fullyResolvedPath); if (!this.fs.exists(targetPathInTrash.getParent())) { this.fs.mkdirs(targetPathInTrash.getParent()); } else if (this.fs.exists(targetPathInTrash)) { targetPathInTrash = targetPathInTrash.suffix("_" + System.currentTimeMillis()); } return this.fs.rename(fullyResolvedPath, targetPathInTrash); }
From source file:gobblin.util.PathUtils.java
License:Apache License
/** * Is an absolute path (ie a slash relative path part) * AND a scheme is null AND authority is null. *///from www . jav a 2 s . c o m public static boolean isAbsoluteAndSchemeAuthorityNull(Path path) { return (path.isAbsolute() && path.toUri().getScheme() == null && path.toUri().getAuthority() == null); }
From source file:hsyndicate.hadoop.dfs.HSyndicateDFS.java
License:Apache License
private Path makeAbsolute(Path path) { if (path.isAbsolute()) { return path; }/*ww w .ja v a2s .c o m*/ return new Path(this.workingDir, path); }