Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified


In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.


public Path makeQualified(FileSystem fs) 

Source Link


Returns a qualified path object for the FileSystem 's working directory.


From source file:bdss.cmu.edu.Sort.java

License:Apache License

 * The main driver for sort program./*from  ww w .  j av a  2  s  . c  o m*/
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker.
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);





    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, conf);

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

 * Add to class path.// w  ww.  j  ava 2 s. c  o m
 * @param config    the config
 * @param classpath the classpath
public static Map<Path, Path> addToClassPath(Configuration config, List<String> classpath) {
    if (classpath == null)
        return null;

    // given to fully qualified
    Map<String, Path> localPaths = new HashMap<String, Path>();
    Map<String, Path> remotePaths = new HashMap<String, Path>();

    resolvePaths(config, classpath, null, null, localPaths, remotePaths);

    try {
        LocalFileSystem localFS = getLocalFS(config);

        for (String path : localPaths.keySet()) {
            // only add local if no remote
            if (remotePaths.containsKey(path))

            Path artifact = localPaths.get(path);

            DistributedCache.addFileToClassPath(artifact.makeQualified(localFS), config);

        FileSystem defaultFS = getDefaultFS(config);

        for (String path : remotePaths.keySet()) {
            // always add remote
            Path artifact = remotePaths.get(path);

            DistributedCache.addFileToClassPath(artifact.makeQualified(defaultFS), config);
    } catch (IOException exception) {
        throw new FlowException("unable to set distributed cache paths", exception);

    return getCommonPaths(localPaths, remotePaths);

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(JobConf conf) {
    String taskId = conf.get("mapred.task.id");

    Path p = new Path(FileOutputFormat.getOutputPath(conf), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {/*from  w ww  .ja  v  a2 s  .  c  o  m*/
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(Configuration conf) {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    Path p = new Path(FileOutputFormat.getOutputPath(asJobConfInstance(conf)),
            TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {//from www  .  jav  a  2 s  . com
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;

From source file:cn.edu.xmu.dm.mapreduce.Sort.java

License:Apache License

 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job./*from   ww  w .  ja va2  s.  c o  m*/
 * @throws IOException
 *             When there is communication problems with the job tracker.
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Sorter");

    JobConf jobConf = new JobConf(getConf(), Sort.class);


    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits

    // Set user-supplied (possibly default) job configs



    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

 * writes a _metadata and _common_metadata file
 * @param configuration the configuration to use to get the FileSystem
 * @param outputPath the directory to write the _metadata file to
 * @param footers the list of footers to merge
 * @throws IOException/*  w  w  w . ja  va2  s.  co  m*/
public static void writeMetadataFile(Configuration configuration, Path outputPath, List<Footer> footers)
        throws IOException {
    FileSystem fs = outputPath.getFileSystem(configuration);
    outputPath = outputPath.makeQualified(fs);
    ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);
    writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE);
    writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE);

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*  w w w . j  a va  2  s .c  om*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    Class<? extends Partitioner> topClass;
    try {
        topClass = getTotalOrderPartitionerClass();
    } catch (ClassNotFoundException e) {
        throw new IOException("Failed getting TotalOrderPartitioner", e);
    //Set the key class for the job output data
    //Set the value class for job outputs

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());

    LOG.info("Looking up current regions for table " + table);
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");


    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);

    URI cacheUri;
    try {
        // Below we make explicit reference to the bundled TOP.  Its cheating.
        // We are assume the define in the hbase bundled TOP is as it is in
        // hadoop (whether 0.20 or 0.22, etc.)
          cacheUri = new URI(partitionsPath.toString() + "#" +
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    DistributedCache.addCacheFile(cacheUri, conf);

    // Set compression algorithms based on column families
    configureCompression(table, conf);

    LOG.info("Incremental table output configured.");

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

 * The driver for the sort MapReduce job.
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *//*w  ww  . j av a2  s.  com*/
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {


    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
    if (numReduceTasks != null) {
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);

        // Set user-supplied (possibly default) job configs




    if (mapCodecClass != null) {

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        return true;
    return false;

From source file:com.aliyun.fs.oss.blk.OssFileSystem.java

License:Apache License

public FileStatus[] listStatus(Path f) throws IOException {
    Path absolutePath = makeAbsolute(f);
    INode inode = store.retrieveINode(absolutePath);
    if (inode == null) {
        return new FileStatus[0];
    }/* w  ww.  j  a v a 2 s .  co  m*/
    if (inode.isFile()) {
        return new FileStatus[] { new OssFileStatus(f.makeQualified(this), inode) };
    ArrayList<FileStatus> ret = new ArrayList<FileStatus>();
    for (Path p : store.listSubPaths(absolutePath)) {
        // Here, we need to convert "file/path" to "/file/path".
        // Otherwise, Path.makeQualified will throw `URISyntaxException`.
        Path modifiedPath = new Path("/" + p.toString());
    return ret.toArray(new FileStatus[0]);

From source file:com.aliyun.fs.oss.blk.OssFileSystem.java

License:Apache License

 * FileStatus for Oss file systems.//from   w w  w. j  a v a 2 s  . c o m
public FileStatus getFileStatus(Path f) throws IOException {
    Path absolutePath = makeAbsolute(f);
    String key = JetOssFileSystemStore.pathToKey(absolutePath);

    if (key.length() == 0) { // root always exists
        return new OssFileStatus(f.makeQualified(this), INode.DIRECTORY_INODE);

    INode inode = store.retrieveINode(makeAbsolute(f));
    if (inode == null) {
        throw new FileNotFoundException(f + ": No such file or directory.");
    return new OssFileStatus(f.makeQualified(this), inode);