List of usage examples for org.apache.hadoop.fs Path makeQualified
@Deprecated
public Path makeQualified(FileSystem fs)
From source file:edu.umn.cs.spatialHadoop.nasa.HTTPFileSystem.java
License:Open Source License
/** * Returns the status of a file. This method is designed specifically to work * with LP DAAC archive and will not work correctly with other web sites. * Since HTTP does not tell whether a URL points to a file or directory, * we assume that URLs ending with HDF, XML and JPG are files while anything * else is considered a directory./* w ww. ja v a2s . c o m*/ */ @Override public FileStatus getFileStatus(Path f) throws IOException { f = f.makeQualified(this); URL url = f.toUri().toURL(); int retryCount = HTTPFileSystem.retries; HttpURLConnection connection = null; try { while (connection == null && retryCount-- > 0) { try { connection = (HttpURLConnection) url.openConnection(); } catch (java.net.SocketException e) { if (retryCount == 0) throw e; LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount); try { ; Thread.sleep(1000); } catch (InterruptedException e1) { } } catch (java.net.UnknownHostException e) { if (retryCount == 0) throw e; LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount); try { Thread.sleep(1000); } catch (InterruptedException e1) { } } } if (connection == null) throw new RuntimeException("Could not connect to " + f); String lengthStr = connection.getHeaderField("content-Length"); long length = lengthStr == null ? -1 : Long.parseLong(lengthStr); if (length == -1) LOG.info("Unknown HTTP file length " + length); long modificationTime = connection.getLastModified(); if (modificationTime == 0) modificationTime = connection.getDate(); // Hard coded to work with LP DAAC archives boolean isdir = !f.getName().matches("(?i:([^*\\?])*\\.(hdf|xml|jpg|gz|bz2|zip|txt|csv|tsv)$)"); return new FileStatus(length, isdir, 1, BLOCK_SIZE, modificationTime, 0, null, null, null, f); } finally { if (connection != null) connection.disconnect(); } }
From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java
License:Apache License
public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path p = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString())); try {/* w w w. ja v a 2 s. c o m*/ FileSystem fs = p.getFileSystem(conf); return p.makeQualified(fs); } catch (IOException ie) { LOG.warn(StringUtils.stringifyException(ie)); return p; } } return null; }
From source file:gr.ntua.h2rdf.inputFormat.MyFileInputFormat.java
License:Open Source License
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * /* w w w .ja v a 2 s. c o m*/ * @param job The {@link Job} to modify * @param path {@link Path} to be added to the list of inputs for * the map-reduce job. */ public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.get(conf); path = path.makeQualified(fs); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get("mapred.input.dir"); conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + "," + dirStr); }
From source file:hdfs.FileUtil.java
License:Apache License
private static void checkDependencies(FileSystem srcFS, Path src, FileSystem dstFS, Path dst) throws IOException { if (srcFS == dstFS) { String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR; String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR; if (dstq.startsWith(srcq)) { if (srcq.length() == dstq.length()) { throw new IOException("Cannot copy " + src + " to itself."); } else { throw new IOException("Cannot copy " + src + " to its subdirectory " + dst); }//from w w w. ja v a 2 s.c om } } }
From source file:hsyndicate.hadoop.dfs.HSyndicateDFS.java
License:Apache License
@Override public synchronized FileStatus[] listStatus(Path f) throws IOException { SyndicateFSPath hpath = makeSyndicateFSPath(f); if (!this.syndicateFS.exists(hpath)) { return null; }/*from w w w . j a va 2 s .c o m*/ if (this.syndicateFS.isFile(hpath)) { return new FileStatus[] { new HSyndicateFileStatus(f.makeQualified(this), this.syndicateFS, hpath) }; } List<FileStatus> ret = new ArrayList<FileStatus>(); for (String p : this.syndicateFS.readDirectoryEntries(hpath)) { ret.add(getFileStatus(new Path(f, p))); } return ret.toArray(new FileStatus[0]); }
From source file:hsyndicate.hadoop.dfs.HSyndicateDFS.java
License:Apache License
@Override public synchronized FileStatus getFileStatus(Path f) throws IOException { SyndicateFSPath hpath = makeSyndicateFSPath(f); if (!this.syndicateFS.exists(hpath)) { throw new FileNotFoundException(f + ": No such file or directory."); }//from w w w .j a va 2 s . c o m return new HSyndicateFileStatus(f.makeQualified(this), this.syndicateFS, hpath); }
From source file:it.crs4.seal.common.SealToolParser.java
License:Open Source License
/** * Parses command line./*ww w . ja va2s . com*/ * * Override this method to implement additional command line options, * but do make sure you call this method to parse the default options. */ protected CommandLine parseOptions(Configuration conf, String[] args) throws ParseException, IOException { myconf = conf; setDefaultProperties(conf); // load settings from configuration file // first, parse the command line (in getRcFile) looking for an option overriding the default seal configuration file File configFile = getRcFile(args); if (configFile != null) loadConfig(conf, configFile); // now parse the entire command line using the default hadoop parser. Now // the user can override properties specified in the config file with properties // specified on the command line. CommandLine line = new GenericOptionsParser(conf, options, args).getCommandLine(); if (line == null) throw new ParseException("Error parsing command line"); // getCommandLine returns an null if there was a parsing error ////////////////////// input/output formats ////////////////////// // set the configuration property. Then, we'll check the property // to ensure it has a valid value, regardless of whether we just set it, // so that the check will also be valid if the property is set directly. if (line.hasOption(opt_inputFormat.getOpt())) myconf.set(INPUT_FORMAT_CONF, line.getOptionValue(opt_inputFormat.getOpt())); validateIOFormat(INPUT_FORMAT_CONF, acceptedInputFormats); if (line.hasOption(opt_outputFormat.getOpt())) myconf.set(OUTPUT_FORMAT_CONF, line.getOptionValue(opt_outputFormat.getOpt())); validateIOFormat(OUTPUT_FORMAT_CONF, acceptedOutputFormats); if (conf.get(INPUT_FORMAT_ENCODING) != null) { String value = conf.get(INPUT_FORMAT_ENCODING); if (value.equals("sanger") || value.equals("illumina")) conf.set(fi.tkk.ics.hadoop.bam.FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING, value); else throw new ParseException("Invalid " + INPUT_FORMAT_ENCODING + ". Expected 'sanger' or 'illumina'"); } /////////////////////// output compression ///////////////////// if (line.hasOption(opt_compressOutput.getOpt())) { myconf.setBoolean("mapred.output.compress", true); String codec = line.getOptionValue(opt_compressOutput.getOpt()); if (codec != null) { String codecClass = "org.apache.hadoop.io.compress.GzipCodec"; // default if ("auto".equalsIgnoreCase(codec) || "gzip".equalsIgnoreCase(codec)) { // pass. Already set } else if ("bzip2".equalsIgnoreCase(codec)) codecClass = "org.apache.hadoop.io.compress.BZip2Codec"; else if ("snappy".equalsIgnoreCase(codec)) codecClass = "org.apache.hadoop.io.compress.SnappyCodec"; else { throw new ParseException("Unknown codec " + codec + ". Valid values are gzip, bzip2, snappy and auto.\n" + "If you want to use an unsupported codec pass 'auto' and set the property mapred.output.compression.codec directly"); } myconf.set("mapred.output.compression.codec", codecClass); } } ////////////////////// number of reducers ////////////////////// if (line.hasOption(opt_nReduceTasks.getOpt())) { String rString = line.getOptionValue(opt_nReduceTasks.getOpt()); try { int r = Integer.parseInt(rString); if (r >= minReduceTasks) nReduceTasks = r; else throw new ParseException("Number of reducers must be greater than or equal to " + minReduceTasks + " (got " + rString + ")"); } catch (NumberFormatException e) { throw new ParseException("Invalid number of reduce tasks '" + rString + "'"); } } ////////////////////// positional arguments ////////////////////// String[] otherArgs = line.getArgs(); if (otherArgs.length < 2) // require at least two: one input and one output throw new ParseException("You must provide input and output paths"); else { // FileSystem fs; for (int i = 0; i < otherArgs.length - 1; ++i) { Path p = new Path(otherArgs[i]); fs = p.getFileSystem(conf); p = p.makeQualified(fs); FileStatus[] files = fs.globStatus(p); if (files != null && files.length > 0) { for (FileStatus status : files) inputs.add(status.getPath()); } else throw new ParseException("Input path " + p.toString() + " doesn't exist"); } // now the last one, should be the output path outputDir = new Path(otherArgs[otherArgs.length - 1]); fs = outputDir.getFileSystem(conf); outputDir = outputDir.makeQualified(fs); if (fs.exists(outputDir)) throw new ParseException( "Output path " + outputDir.toString() + " already exists. Won't overwrite"); } return line; }
From source file:it.crs4.seal.demux.Demux.java
License:Open Source License
private void createLaneContentFiles(Path outputPath, Path sampleSheetPath) throws IOException { StringBuilder builder = new StringBuilder(100); try {/* w w w . j av a2 s . c om*/ Path qualifiedPath = sampleSheetPath.makeQualified(sampleSheetPath.getFileSystem(getConf())); SampleSheet sheet = DemuxUtils.loadSampleSheet(qualifiedPath, getConf()); Collection<String> samples = sheet.getSamples(); // we have one output directory per sample, thus we need one LaneContent file per sample. for (String sample : samples) { Writer out = makeLaneContentWriter(outputPath, sample); try { for (int lane = 1; lane <= 8; ++lane) { builder.delete(0, builder.length()); builder.append(lane - 1).append(":"); if (sheet.getSamplesInLane(lane).contains(sample)) builder.append(sample); builder.append("\n"); out.write(builder.toString()); } } finally { out.close(); } } } catch (SampleSheet.FormatException e) { throw new RuntimeException("Error in sample sheet. " + e.getMessage()); } }
From source file:it.crs4.seal.read_sort.MergeAlignments.java
License:Open Source License
private Path getQualifiedPath(String simplePath) throws IOException { Path path = new Path(simplePath); return path.makeQualified(path.getFileSystem(getConf())); }
From source file:it.crs4.seal.read_sort.MergeAlignments.java
License:Open Source License
private Path[] getSourcePaths() throws Exception { Path srcPath = new Path(userInput); FileSystem srcFs = srcPath.getFileSystem(getConf()); if (srcFs.exists(srcPath)) { FileStatus stat = srcFs.getFileStatus(srcPath); if (stat.isDir()) { String msg = "source path " + srcPath + " is a directory. Globbing with "; srcPath = new Path(srcPath, "*"); log.info(msg + srcPath);//from w w w. ja v a2 s . co m } } // Glob source path. The returned paths are already sorted. We filter out paths starting // with '_' (see SourcePathFilter). // If the path doesn't contain a glob patter, and it doesn't exist, the function will return null. Path[] sources = FileUtil.stat2Paths(srcFs.globStatus(srcPath, new SourcePathFilter())); if (sources == null) throw new IllegalArgumentException("Source path " + srcPath.makeQualified(srcFs) + " doesn't exist"); if (log.isDebugEnabled()) { log.debug("Sources:"); for (int i = 0; i < sources.length; ++i) log.debug(sources[i]); } if (sources.length == 0) throw new IllegalArgumentException("no source files selected"); log.info("Merging " + sources.length + " files."); return sources; }