Example usage for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs)

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:edu.umn.cs.spatialHadoop.nasa.HTTPFileSystem.java

License:Open Source License

/**
 * Returns the status of a file. This method is designed specifically to work
 * with LP DAAC archive and will not work correctly with other web sites.
 * Since HTTP does not tell whether a URL points to a file or directory,
 * we assume that URLs ending with HDF, XML and JPG are files while anything
 * else is considered a directory./*  w  ww.  ja  v  a2s  .  c  o m*/
 */
@Override
public FileStatus getFileStatus(Path f) throws IOException {
    f = f.makeQualified(this);
    URL url = f.toUri().toURL();
    int retryCount = HTTPFileSystem.retries;

    HttpURLConnection connection = null;
    try {
        while (connection == null && retryCount-- > 0) {
            try {
                connection = (HttpURLConnection) url.openConnection();
            } catch (java.net.SocketException e) {
                if (retryCount == 0)
                    throw e;
                LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount);
                try {
                    ;
                    Thread.sleep(1000);
                } catch (InterruptedException e1) {
                }
            } catch (java.net.UnknownHostException e) {
                if (retryCount == 0)
                    throw e;
                LOG.info("Error accessing file '" + url + "'. Trials left: " + retryCount);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e1) {
                }
            }
        }

        if (connection == null)
            throw new RuntimeException("Could not connect to " + f);
        String lengthStr = connection.getHeaderField("content-Length");
        long length = lengthStr == null ? -1 : Long.parseLong(lengthStr);
        if (length == -1)
            LOG.info("Unknown HTTP file length " + length);
        long modificationTime = connection.getLastModified();
        if (modificationTime == 0)
            modificationTime = connection.getDate();
        // Hard coded to work with LP DAAC archives
        boolean isdir = !f.getName().matches("(?i:([^*\\?])*\\.(hdf|xml|jpg|gz|bz2|zip|txt|csv|tsv)$)");
        return new FileStatus(length, isdir, 1, BLOCK_SIZE, modificationTime, 0, null, null, null, f);
    } finally {
        if (connection != null)
            connection.disconnect();
    }
}

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        Path p = new Path(outputPath,
                (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString()));
        try {/*  w w  w.  ja  v a 2  s. c o  m*/
            FileSystem fs = p.getFileSystem(conf);
            return p.makeQualified(fs);
        } catch (IOException ie) {
            LOG.warn(StringUtils.stringifyException(ie));
            return p;
        }
    }
    return null;
}

From source file:gr.ntua.h2rdf.inputFormat.MyFileInputFormat.java

License:Open Source License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * /* w  w  w  .ja  v  a  2 s.  c  o m*/
 * @param job The {@link Job} to modify
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(Job job, Path path) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    path = path.makeQualified(fs);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + "," + dirStr);
}

From source file:hdfs.FileUtil.java

License:Apache License

private static void checkDependencies(FileSystem srcFS, Path src, FileSystem dstFS, Path dst)
        throws IOException {
    if (srcFS == dstFS) {
        String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR;
        String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR;
        if (dstq.startsWith(srcq)) {
            if (srcq.length() == dstq.length()) {
                throw new IOException("Cannot copy " + src + " to itself.");
            } else {
                throw new IOException("Cannot copy " + src + " to its subdirectory " + dst);
            }//from  w w  w.  ja  v a  2 s.c  om
        }
    }
}

From source file:hsyndicate.hadoop.dfs.HSyndicateDFS.java

License:Apache License

@Override
public synchronized FileStatus[] listStatus(Path f) throws IOException {
    SyndicateFSPath hpath = makeSyndicateFSPath(f);
    if (!this.syndicateFS.exists(hpath)) {
        return null;
    }/*from  w  w  w  . j  a  va 2  s  .c o  m*/

    if (this.syndicateFS.isFile(hpath)) {
        return new FileStatus[] { new HSyndicateFileStatus(f.makeQualified(this), this.syndicateFS, hpath) };
    }

    List<FileStatus> ret = new ArrayList<FileStatus>();
    for (String p : this.syndicateFS.readDirectoryEntries(hpath)) {
        ret.add(getFileStatus(new Path(f, p)));
    }
    return ret.toArray(new FileStatus[0]);
}

From source file:hsyndicate.hadoop.dfs.HSyndicateDFS.java

License:Apache License

@Override
public synchronized FileStatus getFileStatus(Path f) throws IOException {
    SyndicateFSPath hpath = makeSyndicateFSPath(f);
    if (!this.syndicateFS.exists(hpath)) {
        throw new FileNotFoundException(f + ": No such file or directory.");
    }//from w  w  w  .j a va  2  s  .  c o  m

    return new HSyndicateFileStatus(f.makeQualified(this), this.syndicateFS, hpath);
}

From source file:it.crs4.seal.common.SealToolParser.java

License:Open Source License

/**
 * Parses command line./*ww w  .  ja  va2s  . com*/
 *
 * Override this method to implement additional command line options,
 * but do make sure you call this method to parse the default options.
 */
protected CommandLine parseOptions(Configuration conf, String[] args) throws ParseException, IOException {
    myconf = conf;

    setDefaultProperties(conf);

    // load settings from configuration file
    // first, parse the command line (in getRcFile) looking for an option overriding the default seal configuration file
    File configFile = getRcFile(args);
    if (configFile != null)
        loadConfig(conf, configFile);

    // now parse the entire command line using the default hadoop parser.  Now
    // the user can override properties specified in the config file with properties
    // specified on the command line.
    CommandLine line = new GenericOptionsParser(conf, options, args).getCommandLine();
    if (line == null)
        throw new ParseException("Error parsing command line"); // getCommandLine returns an null if there was a parsing error

    ////////////////////// input/output formats //////////////////////
    // set the configuration property.  Then, we'll check the property
    // to ensure it has a valid value, regardless of whether we just set it,
    // so that the check will also be valid if the property is set directly.
    if (line.hasOption(opt_inputFormat.getOpt()))
        myconf.set(INPUT_FORMAT_CONF, line.getOptionValue(opt_inputFormat.getOpt()));

    validateIOFormat(INPUT_FORMAT_CONF, acceptedInputFormats);

    if (line.hasOption(opt_outputFormat.getOpt()))
        myconf.set(OUTPUT_FORMAT_CONF, line.getOptionValue(opt_outputFormat.getOpt()));

    validateIOFormat(OUTPUT_FORMAT_CONF, acceptedOutputFormats);

    if (conf.get(INPUT_FORMAT_ENCODING) != null) {
        String value = conf.get(INPUT_FORMAT_ENCODING);
        if (value.equals("sanger") || value.equals("illumina"))
            conf.set(fi.tkk.ics.hadoop.bam.FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING, value);
        else
            throw new ParseException("Invalid " + INPUT_FORMAT_ENCODING + ". Expected 'sanger' or 'illumina'");
    }

    /////////////////////// output compression /////////////////////
    if (line.hasOption(opt_compressOutput.getOpt())) {
        myconf.setBoolean("mapred.output.compress", true);
        String codec = line.getOptionValue(opt_compressOutput.getOpt());
        if (codec != null) {
            String codecClass = "org.apache.hadoop.io.compress.GzipCodec"; // default
            if ("auto".equalsIgnoreCase(codec) || "gzip".equalsIgnoreCase(codec)) {
                // pass.  Already set
            } else if ("bzip2".equalsIgnoreCase(codec))
                codecClass = "org.apache.hadoop.io.compress.BZip2Codec";
            else if ("snappy".equalsIgnoreCase(codec))
                codecClass = "org.apache.hadoop.io.compress.SnappyCodec";
            else {
                throw new ParseException("Unknown codec " + codec
                        + ". Valid values are gzip, bzip2, snappy and auto.\n"
                        + "If you want to use an unsupported codec pass 'auto' and set the property mapred.output.compression.codec directly");
            }

            myconf.set("mapred.output.compression.codec", codecClass);
        }
    }

    ////////////////////// number of reducers //////////////////////
    if (line.hasOption(opt_nReduceTasks.getOpt())) {
        String rString = line.getOptionValue(opt_nReduceTasks.getOpt());
        try {
            int r = Integer.parseInt(rString);
            if (r >= minReduceTasks)
                nReduceTasks = r;
            else
                throw new ParseException("Number of reducers must be greater than or equal to " + minReduceTasks
                        + " (got " + rString + ")");
        } catch (NumberFormatException e) {
            throw new ParseException("Invalid number of reduce tasks '" + rString + "'");
        }
    }

    ////////////////////// positional arguments //////////////////////
    String[] otherArgs = line.getArgs();
    if (otherArgs.length < 2) // require at least two:  one input and one output
        throw new ParseException("You must provide input and output paths");
    else {
        //
        FileSystem fs;
        for (int i = 0; i < otherArgs.length - 1; ++i) {
            Path p = new Path(otherArgs[i]);
            fs = p.getFileSystem(conf);
            p = p.makeQualified(fs);
            FileStatus[] files = fs.globStatus(p);
            if (files != null && files.length > 0) {
                for (FileStatus status : files)
                    inputs.add(status.getPath());
            } else
                throw new ParseException("Input path " + p.toString() + " doesn't exist");
        }
        // now the last one, should be the output path
        outputDir = new Path(otherArgs[otherArgs.length - 1]);
        fs = outputDir.getFileSystem(conf);
        outputDir = outputDir.makeQualified(fs);
        if (fs.exists(outputDir))
            throw new ParseException(
                    "Output path " + outputDir.toString() + " already exists.  Won't overwrite");
    }

    return line;
}

From source file:it.crs4.seal.demux.Demux.java

License:Open Source License

private void createLaneContentFiles(Path outputPath, Path sampleSheetPath) throws IOException {
    StringBuilder builder = new StringBuilder(100);

    try {/*  w  w w .  j av  a2  s  .  c  om*/
        Path qualifiedPath = sampleSheetPath.makeQualified(sampleSheetPath.getFileSystem(getConf()));
        SampleSheet sheet = DemuxUtils.loadSampleSheet(qualifiedPath, getConf());
        Collection<String> samples = sheet.getSamples();
        // we have one output directory per sample, thus we need one LaneContent file per sample.
        for (String sample : samples) {
            Writer out = makeLaneContentWriter(outputPath, sample);
            try {
                for (int lane = 1; lane <= 8; ++lane) {
                    builder.delete(0, builder.length());
                    builder.append(lane - 1).append(":");
                    if (sheet.getSamplesInLane(lane).contains(sample))
                        builder.append(sample);
                    builder.append("\n");
                    out.write(builder.toString());
                }
            } finally {
                out.close();
            }
        }
    } catch (SampleSheet.FormatException e) {
        throw new RuntimeException("Error in sample sheet.  " + e.getMessage());
    }
}

From source file:it.crs4.seal.read_sort.MergeAlignments.java

License:Open Source License

private Path getQualifiedPath(String simplePath) throws IOException {
    Path path = new Path(simplePath);
    return path.makeQualified(path.getFileSystem(getConf()));
}

From source file:it.crs4.seal.read_sort.MergeAlignments.java

License:Open Source License

private Path[] getSourcePaths() throws Exception {
    Path srcPath = new Path(userInput);
    FileSystem srcFs = srcPath.getFileSystem(getConf());
    if (srcFs.exists(srcPath)) {
        FileStatus stat = srcFs.getFileStatus(srcPath);
        if (stat.isDir()) {
            String msg = "source path " + srcPath + " is a directory.  Globbing with ";
            srcPath = new Path(srcPath, "*");
            log.info(msg + srcPath);//from w w  w. ja v  a2  s  .  co m
        }
    }

    // Glob source path.  The returned paths are already sorted.  We filter out paths starting
    // with '_' (see SourcePathFilter).
    // If the path doesn't contain a glob patter, and it doesn't exist, the function will return null.
    Path[] sources = FileUtil.stat2Paths(srcFs.globStatus(srcPath, new SourcePathFilter()));
    if (sources == null)
        throw new IllegalArgumentException("Source path " + srcPath.makeQualified(srcFs) + " doesn't exist");

    if (log.isDebugEnabled()) {
        log.debug("Sources:");
        for (int i = 0; i < sources.length; ++i)
            log.debug(sources[i]);
    }

    if (sources.length == 0)
        throw new IllegalArgumentException("no source files selected");

    log.info("Merging " + sources.length + " files.");

    return sources;
}