Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:com.asakusafw.testdriver.temporary.TemporaryOutputRetriever.java

License:Apache License

@Override
public void truncate(TemporaryOutputDescription description, TestContext context) throws IOException {
    LOG.debug("Deleting output directory: {}", description); //$NON-NLS-1$
    VariableTable variables = createVariables(context);
    Configuration config = configurations.newInstance();
    FileSystem fs = FileSystem.get(config);
    String resolved = variables.parse(description.getPathPrefix(), false);
    Path path = new Path(resolved);
    Path output = path.getParent();
    Path target;/*from www. ja v  a  2s  .  c o m*/
    if (output == null) {
        LOG.warn("Skipped deleting output directory because it is a base directory: {}", path);
        target = fs.makeQualified(path);
    } else {
        LOG.debug("Output directory will be deleted: {}", output); //$NON-NLS-1$
        target = fs.makeQualified(output);
    }
    TemporaryInputPreparator.delete(fs, target);
}

From source file:com.asakusafw.testdriver.testing.moderator.MockImporterPreparator.java

License:Apache License

@Override
public void truncate(MockImporterDescription description, TestContext context) throws IOException {
    Configuration config = configurations.newInstance();
    FileSystem fs = FileSystem.get(config);
    Path target = fs.makeQualified(new Path(description.getDirectory()));
    if (fs.exists(target)) {
        fs.delete(target, true);/*  w  ww  .j a  v  a2  s  .  c o m*/
    }
}

From source file:com.asakusafw.windgate.hadoopfs.HadoopFsProfile.java

License:Apache License

private static Path extractBasePath(Configuration configuration, ResourceProfile profile) throws IOException {
    assert configuration != null;
    assert profile != null;
    String result = extract(profile, KEY_BASE_PATH, false);
    try {//w w  w  . j ava 2  s.co  m
        if (result == null || result.isEmpty()) {
            FileSystem fileSystem = FileSystem.get(configuration);
            return fileSystem.getWorkingDirectory();
        }
        URI uri = URI.create(result);
        FileSystem fileSystem = FileSystem.get(uri, configuration);
        return fileSystem.makeQualified(new Path(uri));
    } catch (IOException e) {
        WGLOG.error(e, "E00002", profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result);
        throw new IOException(MessageFormat.format("Failed to initialize the file system: {1} (resource={0})",
                profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result), e);
    }
}

From source file:com.asakusafw.workflow.hadoop.HadoopDelete.java

License:Apache License

private static void delete(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug("deleting file: {}", fs.makeQualified(path));
    }/*from w w w  .j  av a2  s.c o m*/
    boolean deleted = fs.delete(path, true);
    if (LOG.isDebugEnabled()) {
        if (deleted) {
            LOG.debug("delete success: {}", fs.makeQualified(path));
        } else if (fs.exists(path)) {
            LOG.debug("delete failed: {}", fs.makeQualified(path));
        } else {
            LOG.debug("target file is not found: {}", fs.makeQualified(path));
        }
    }
}

From source file:com.bah.lucene.hdfs.SoftlinkHdfsDirectory.java

License:Apache License

/**
 * Creates a new SoftlinkHdfsDirectory.// w  w  w .  ja  v a 2s.  c  om
 * 
 * @param configuration
 *          the {@link Configuration} object.
 * @param storePath
 *          the path where the data is actually stored.
 * @param linkPath
 *          the path where the links are stored.
 * @throws IOException
 */
public SoftlinkHdfsDirectory(Configuration configuration, Path storePath, Path linkPath) throws IOException {
    super(configuration, linkPath);
    FileSystem fileSystem = storePath.getFileSystem(configuration);
    _storePath = fileSystem.makeQualified(storePath);
    _linkPath = fileSystem.makeQualified(linkPath);
}

From source file:com.benchmark.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//  w w  w  . jav  a  2 s.c  o  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    //final Path inDir = new Path(TMP_DIR, "in");
    final Path inDir = new Path("/home/hadoop1/tmp_dir", "in");
    System.out.println("inDir =" + inDir.toString());
    //final Path outDir = new Path(TMP_DIR, "out");
    final Path outDir = new Path("/home/hadoop1/tmp_dir", "out");
    System.out.println("outDir =" + outDir.toString());
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Serializes the fList and returns the string representation of the List
 *//*  ww w . j  a  v a 2s  . c o  m*/
public static void saveFList(Iterable<Pair<String, Long>> flist, Parameters params, Configuration conf)
        throws IOException {
    Path flistPath = new Path(params.get(OUTPUT), F_LIST);
    FileSystem fs = FileSystem.get(flistPath.toUri(), conf);
    flistPath = fs.makeQualified(flistPath);
    HadoopUtil.delete(conf, flistPath);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, LongWritable.class);
    try {
        for (Pair<String, Long> pair : flist) {
            writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond()));
        }
    } finally {
        writer.close();
    }
    DistributedCache.addCacheFile(flistPath.toUri(), conf);
}

From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Serializes the fList and returns the string representation of the List
 *///from w w w  . j  a  va2s  .com
public static void saveFList(List<Pair<String, Long>> fList, Parameters params, Configuration conf)
        throws IOException {
    Path flistPath = new Path(params.get(OUTPUT) + "/oldlist", F_LIST);
    FileSystem fs = FileSystem.get(flistPath.toUri(), conf);
    flistPath = fs.makeQualified(flistPath);
    HadoopUtil.delete(conf, flistPath);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, Pair.class);

    // set param to control group size in MR jobs
    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
    int maxPerGroup = fList.size() / numGroups;
    if (fList.size() % numGroups != 0) {
        maxPerGroup++;
    }
    params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));

    try {
        int group = 0;
        int count = 0;
        for (Pair<String, Long> pair : fList) {
            if (count == maxPerGroup) {
                group++;
                count = 0;
            }
            writer.append(new Text(pair.getFirst()), new Pair<Integer, Long>(group, pair.getSecond()));
            //writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond()));
        }
    } finally {
        writer.close();
    }
    DistributedCache.addCacheFile(flistPath.toUri(), conf);
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Submit a new job to run.//from ww w  . j a v  a2 s.  co m
 * @param job BSPJob
 * @return Review comments: (1)The content of submitJobDir is decided by the
 *         client. I think it is dangerous because two different clients maybe
 *         generate the same submitJobDir. Review time: 2011-11-30; Reviewer:
 *         Hongxu Zhang. Fix log: (1)In order to avoid the conflict, I use the
 *         jobId to generate the submitJobDir. Because the jobId is unique so
 *         this problem can be solved. Fix time: 2011-12-04; Programmer:
 *         Zhigang Wang. Review comments: (2)There, the client must submit
 *         relative information about the job. There maybe some exceptions
 *         during this process. When exceptions occur, this job should not be
 *         executed and the relative submitJobDir must be cleanup. Review
 *         time: 2011-12-04; Reviewer: Hongxu Zhang. Fix log: (2)The process
 *         of submiting files has been surrounded by try-catch. The
 *         submitJobDir will be cleanup in the catch process. Fix time:
 *         2011-12-04; Programmer: Zhigang Wang.
 */
public RunningJob submitJobInternal(BSPJob job) {
    BSPJobID jobId = null;
    Path submitJobDir = null;
    try {
        jobId = jobSubmitClient.getNewJobId();
        submitJobDir = new Path(getSystemDir(), "submit_" + jobId.toString());
        Path submitJarFile = null;
        LOG.info("debug: job type is " + job.getJobType());
        if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) {
            submitJarFile = new Path(submitJobDir, "jobC");
            LOG.info("debug:" + submitJarFile.toString());
        } else {
            LOG.info("debug: before  submitJarFile = new " + "Path(submitJobDir,job.jar);");
            submitJarFile = new Path(submitJobDir, "job.jar");
            LOG.info("debug:" + submitJarFile.toString());
        }
        Path submitJobFile = new Path(submitJobDir, "job.xml");
        Path submitSplitFile = new Path(submitJobDir, "job.split");
        // set this user's id in job configuration, so later job files can
        // be accessed using this user's id
        UnixUserGroupInformation ugi = getUGI(job.getConf());
        // Create a number of filenames in the BSPController's fs namespace
        FileSystem files = getFs();
        files.delete(submitJobDir, true);
        submitJobDir = files.makeQualified(submitJobDir);
        submitJobDir = new Path(submitJobDir.toUri().getPath());
        BSPFsPermission bspSysPerms = new BSPFspermissionImpl(2);
        FileSystem.mkdirs(files, submitJobDir, bspSysPerms.getFp());
        files.mkdirs(submitJobDir);
        short replication = (short) job.getInt("bsp.submit.replication", 10);
        String originalJarPath = null;
        LOG.info("debug: job type is " + job.getJobType());
        if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) {
            LOG.info("debug: originalJarPath = job.getJobExe();" + job.getJobExe());
            originalJarPath = job.getJobExe();
            LOG.info("debug:" + submitJarFile.toString());
            job.setJobExe(submitJarFile.toString());
        } else {
            LOG.info("debug: jar");
            originalJarPath = job.getJar();
            job.setJar(submitJarFile.toString());
        }
        if (originalJarPath != null) {
            // copy jar to BSPController's fs
            // use jar name if job is not named.
            if ("".equals(job.getJobName())) {
                job.setJobName(new Path(originalJarPath).getName());
            }
            // job.setJar(submitJarFile.toString());
            fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);
            fs.setReplication(submitJarFile, replication);
            fs.setPermission(submitJarFile, new BSPFspermissionImpl(0).getFp());
        } else {
            LOG.warn("No job jar file set.  User classes may not be found. "
                    + "See BSPJob#setJar(String) or check Your jar file.");
        }
        // Set the user's name and working directory
        job.setUser(ugi.getUserName());
        if (ugi.getGroupNames().length > 0) {
            job.set("group.name", ugi.getGroupNames()[0]);
        }
        if (new BSPHdfsImpl().getWorkingDirectory() == null) {
            job.setWorkingDirectory(fs.getWorkingDirectory());
        }
        int maxClusterStaffs = jobSubmitClient.getClusterStatus(false).getMaxClusterStaffs();
        if (job.getNumPartition() == 0) {
            job.setNumPartition(maxClusterStaffs);
        }
        if (job.getNumPartition() > maxClusterStaffs) {
            job.setNumPartition(maxClusterStaffs);
        }
        job.setNumBspStaff(job.getNumPartition());
        int splitNum = 0;
        splitNum = writeSplits(job, submitSplitFile);
        if (splitNum > job.getNumPartition() && splitNum <= maxClusterStaffs) {
            job.setNumPartition(splitNum);
            job.setNumBspStaff(job.getNumPartition());
        }
        if (splitNum > maxClusterStaffs) {
            LOG.error("Sorry, the number of files is more than maxClusterStaffs:" + maxClusterStaffs);
            throw new IOException("Could not launch job");
        }
        job.set(Constants.USER_BC_BSP_JOB_SPLIT_FILE, submitSplitFile.toString());
        LOG.info("[Max Staff Number] " + maxClusterStaffs);
        LOG.info("The number of splits for the job is: " + splitNum);
        LOG.info("The number of staffs for the job is: " + job.getNumBspStaff());
        BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(fs, submitJobFile,
                new BSPFspermissionImpl(0).getFp());
        try {
            job.writeXml(bspout.getOut());
        } finally {
            bspout.close();
        }
        // Now, actually submit the job (using the submit name)
        JobStatus status = jobSubmitClient.submitJob(jobId, submitJobFile.toString());
        if (status != null) {
            return new NetworkedJob(status);
        } else {
            throw new IOException("Could not launch job");
        }
    } catch (FileNotFoundException fnfE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", fnfE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.INDETERMINATE, "null", fnfE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (ClassNotFoundException cnfE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", cnfE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.WARNING, "null", cnfE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (InterruptedException iE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", iE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.CRITICAL, "null", iE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (Exception ioE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", ioE);
        Fault f = new Fault(Fault.Type.DISK, Fault.Level.CRITICAL, "null", ioE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    }
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning
 * against <code>splitPoints</code>. Cleans up the partitions file after job
 * exists./*  w  w  w. j a  va  2  s .com*/
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {

    // create the partitions file
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    fs.deleteOnExit(partitionsPath);
    writePartitions(job.getConfiguration(), partitionsPath, splitPoints);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);
}