Example usage for org.apache.hadoop.fs FileSystem rename

List of usage examples for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:org.apache.tez.mapreduce.output.LocalOnFileSorterOutput.java

License:Apache License

@Override
public List<Event> close() throws IOException {
    LOG.debug("Closing LocalOnFileSorterOutput");
    super.close();

    TezTaskOutput mapOutputFile = sorter.getMapOutput();
    FileSystem localFs = FileSystem.getLocal(conf);

    Path src = mapOutputFile.getOutputFile();
    Path dst = mapOutputFile.getInputFileForWrite(getContext().getTaskIndex(),
            localFs.getFileStatus(src).getLen());

    LOG.info("Renaming src = " + src + ", dst = " + dst);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Renaming src = " + src + ", dst = " + dst);
    }/*from  w w  w . j  av a  2s.co m*/
    localFs.rename(src, dst);
    return null;
}

From source file:org.apache.tez.runtime.library.output.LocalOnFileSorterOutput.java

License:Apache License

@Override
public List<Event> close() throws IOException {
    LOG.debug("Closing LocalOnFileSorterOutput");
    super.close();

    TezTaskOutput mapOutputFile = sorter.getMapOutput();
    FileSystem localFs = FileSystem.getLocal(conf);

    Path src = mapOutputFile.getOutputFile();
    Path dst = mapOutputFile.getInputFileForWrite(outputContext.getTaskIndex(),
            localFs.getFileStatus(src).getLen());

    LOG.info("Renaming src = " + src + ", dst = " + dst);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Renaming src = " + src + ", dst = " + dst);
    }/*from  w ww .j  a v a  2  s. c  o m*/
    localFs.rename(src, dst);
    return null;
}

From source file:org.archive.access.nutch.jobs.NutchwaxLinkDbMerger.java

License:Open Source License

public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
    JobConf job = NutchwaxLinkDb.createMergeJob(getConf(), output, normalize, filter);

    for (int i = 0; i < dbs.length; i++) {
        job.addInputPath(new Path(dbs[i], LinkDb.CURRENT_NAME));
    }//from w  ww .j a va  2 s  . c  om

    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(output);
    fs.rename(job.getOutputPath(), new Path(output, LinkDb.CURRENT_NAME));
}

From source file:org.archive.nutchwax.PageRankDb.java

License:Apache License

public static void install(JobConf job, Path pageRankDb) throws IOException {
    Path newPageRankDb = FileOutputFormat.getOutputPath(job);
    FileSystem fs = new JobClient(job).getFs();
    Path old = new Path(pageRankDb, "old");
    Path current = new Path(pageRankDb, CURRENT_NAME);
    if (fs.exists(current)) {
        if (fs.exists(old))
            fs.delete(old, true);/*from  w ww .j a  va  2 s. c  o  m*/
        fs.rename(current, old);
    }
    fs.mkdirs(pageRankDb);
    fs.rename(newPageRankDb, current);
    if (fs.exists(old))
        fs.delete(old, true);
    LockUtil.removeLockFile(fs, new Path(pageRankDb, LOCK_NAME));
}

From source file:org.archive.nutchwax.PageRankDbMerger.java

License:Apache License

public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
    JobConf job = createMergeJob(getConf(), output, normalize, filter);
    for (int i = 0; i < dbs.length; i++) {
        FileInputFormat.addInputPath(job, new Path(dbs[i], PageRankDb.CURRENT_NAME));
    }/*ww  w.  ja  va  2  s.  c om*/
    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(output);
    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, PageRankDb.CURRENT_NAME));
}

From source file:org.avenir.tree.DataPartitioner.java

License:Apache License

/**
 * @param outPath/*from w  ww  .ja  v a 2  s.  co  m*/
 * @param segmentCount
 * @param conf
 * @throws IOException
 */
private void moveOutputToSegmentDir(String outPath, int segmentCount, Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);
    for (int i = 0; i < segmentCount; ++i) {
        //create segment dir
        String dir = outPath + "/segment=" + i + "/data";
        Path segmentPath = new Path(dir);
        fileSystem.mkdirs(segmentPath);

        //move output to segment dir
        Path srcFile = new Path(outPath + "/part-r-0000" + i);
        Path dstFile = new Path(outPath + "/segment=" + i + "/data/partition.txt");
        fileSystem.rename(srcFile, dstFile);
    }

    fileSystem.close();
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.output.cram.GaeaKeyIgnoringCramRecordWriter.java

License:Open Source License

@Override
public void close(TaskAttemptContext ctx) throws IOException {
    cramContainerStream.finish(true);//  w  w w .j  ava2  s.  c om
    origOutput.close();

    if (rename) {
        final FileSystem srcFS = outputPath.getFileSystem(ctx.getConfiguration());
        if (this.sample != null) {
            Path newName = new Path(outputPath.getParent() + "/" + sample + ".sorted.cram");
            srcFS.rename(outputPath, newName);
        }
    }
}

From source file:org.broadinstitute.sting.gatk.hadoop.GATKJobClient.java

License:Open Source License

static void parseCommandLineArgs(String[] argv, Configuration conf) {

    CommandLineParser parser = new PosixParser();

    Options options = new Options();

    Option gatkdLocOpt = OptionBuilder.withArgName("depjar_loc").hasArg()
            .withDescription("Complete HDFS path of gatk dependency jar").create("djarloc");
    options.addOption(gatkdLocOpt);//from  ww  w  .  ja  v a  2  s . c o m
    Option bwaLocOpt = OptionBuilder.withArgName("bwa_loc").hasArg()
            .withDescription("Complete HDFS path of bwa binary or bwa.exe file").create("bwaloc");
    options.addOption(bwaLocOpt);
    Option fq1Opt = OptionBuilder.withArgName("fastq_file1").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for 1st fastq file")
            .create("r1");
    options.addOption(fq1Opt);
    Option fq2Opt = OptionBuilder.withArgName("fastq_file2").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for 2nd fastq file")
            .create("r2");
    options.addOption(fq2Opt);
    Option bamOpt = OptionBuilder.withArgName("bam_directory").hasArg()
            .withDescription(
                    "Complete HDFS directory path or path relative to user directory for input BAM file")
            .create("b");
    options.addOption(bamOpt);
    Option outOpt = OptionBuilder.withArgName("output_directory").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for output directory")
            .create("o");
    options.addOption(outOpt);
    Option rSizeOpt = OptionBuilder.withArgName("fastq_read_size").hasArg()
            .withDescription("Number of bytes of a read sequence in input FastQ file").create("rsize");
    options.addOption(rSizeOpt);
    Option rPSplitOpt = OptionBuilder.withArgName("reads_per_map_split").hasArg()
            .withDescription("Optional number of reads to be processed by a mapper").create("reads_per_split");
    options.addOption(rPSplitOpt);
    Option nRedOpt = OptionBuilder.withArgName("number_of_reducers").hasArg()
            .withDescription("Optional number of reducers").create("nred");
    options.addOption(nRedOpt);
    Option nThreadOpt = OptionBuilder.withArgName("number_of_threads").hasArg()
            .withDescription("Optional number of threads").create("nthreads");
    options.addOption(nThreadOpt);
    Option refFileOpt = OptionBuilder.withArgName("path_to_reference_dir").hasArg()
            .withDescription("Complete HDFS path of reference directory").create("ref");
    options.addOption(refFileOpt);
    Option kSiteFileOpt = OptionBuilder.withArgName("path_to_knownsites_dir").hasArg()
            .withDescription("Complete HDFS path of known-sites db directory").create("dbfile");
    options.addOption(kSiteFileOpt);

    Option platformOpt = OptionBuilder.withArgName("Linux/Windows").hasArg()
            .withDescription("Platform to run on").create("p");
    options.addOption(platformOpt);

    Option noAlignOpt = new Option("na", "noalign", false, "Don't run Alignment stage");
    options.addOption(noAlignOpt);

    Option noReAlignOpt = new Option("nra", "norealign", false, "Do not run Local Realignment stage");
    options.addOption(noReAlignOpt);

    Option noMarkDupOpt = new Option("nmd", "nomarkdup", false, "Do not run Mark Duplicates stage");
    options.addOption(noMarkDupOpt);

    Option noQRecabOpt = new Option("nqr", "noqrecab", false, "Do not run Quality Recalibration stage");
    options.addOption(noQRecabOpt);

    Option noVarOpt = new Option("nv", "novariant", false, "Do not run Structural Variant stage");
    options.addOption(noVarOpt);

    Option noFVarOpt = new Option("nfv", "nofvariant", false, "Do not run Filter Variant stage");
    options.addOption(noFVarOpt);

    Option noMerOpt = new Option("nm", "nomresults", false, "Do not Merge Results");
    options.addOption(noMerOpt);

    Option isXVariantOpt = new Option("xv", "xvariant", false,
            "enable flag, if variant calling should be done independently for INDELs and SNPs");
    options.addOption(isXVariantOpt);

    try {
        // parse the command line arguments
        String[] args = new GenericOptionsParser(conf, options, argv).getRemainingArgs();
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(noAlignOpt.getOpt()))
            noalign = true;
        if (line.hasOption(noReAlignOpt.getOpt()))
            norealign = true;
        if (line.hasOption(noMarkDupOpt.getOpt()))
            nomarkdup = true;
        if (line.hasOption(noQRecabOpt.getOpt()))
            noqrecab = true;
        if (line.hasOption(noVarOpt.getOpt()))
            novariant = true;
        if (line.hasOption(noFVarOpt.getOpt()))
            nofvariant = true;
        if (line.hasOption(noMerOpt.getOpt()))
            nomresults = true;

        if (line.hasOption(fq1Opt.getOpt()) && line.hasOption(bamOpt.getOpt())) {
            throw new ParseException(
                    "Invalid Usage: fastq file and BAM file cannot be given together as input");
        }
        if (line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())) {
            throw new ParseException("Invalid Usage: fastq file2 is invalid without fastq file1");
        }
        if (!line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())
                && !line.hasOption(bamOpt.getOpt())) {
            throw new ParseException(
                    "Invalid Usage: Either the fastq file or BAM file has to be provided as input");
        }
        if (line.hasOption(gatkdLocOpt.getOpt())) {
            gatk_binary_loc = line.getOptionValue(gatkdLocOpt.getOpt());
            validatePath(gatk_binary_loc, conf);
        } else {
            throw new ParseException(
                    "Invalid Usage: GATK dependency jar location (-djarloc) is mandatory for running the pipeline");
        }

        if (!noalign) {
            if (line.hasOption(fq1Opt.getOpt())) {
                readFile1 = line.getOptionValue(fq1Opt.getOpt());
                validatePath(readFile1, conf);
                fqInput = (new Path(readFile1).getParent()).toString();
            }
            if (line.hasOption(fq2Opt.getOpt())) {
                readFile2 = line.getOptionValue(fq2Opt.getOpt());
                conf.setBoolean("gatk.hadoop.pairedend", true);
                validatePath(readFile2, conf);
                conf.set("gatk.hadoop.readfile2", readFile2);
                ;
            }
            if (line.hasOption(rSizeOpt.getOpt())) {
                fq_read_size = Integer.parseInt(line.getOptionValue(rSizeOpt.getOpt()));
            } else {
                throw new ParseException("Invalid Usage: read size (-rsize) is mandatory for Alignment");
            }
            if (line.hasOption(bwaLocOpt.getOpt())) {
                bwa_binary_loc = line.getOptionValue(bwaLocOpt.getOpt());
                validatePath(bwa_binary_loc, conf);
            } else {
                throw new ParseException(
                        "Invalid Usage: bwa binary/exe location (-bwaloc) is mandatory for Alignment");
            }
            if (line.hasOption(rPSplitOpt.getOpt())) {
                reads_per_split = Integer.parseInt(line.getOptionValue(rPSplitOpt.getOpt()));
            }
        }
        if (line.hasOption(nRedOpt.getOpt())) {
            nReducers = Integer.parseInt(line.getOptionValue(nRedOpt.getOpt()));
        }
        if (line.hasOption(nThreadOpt.getOpt())) {
            nThreads = Integer.parseInt(line.getOptionValue(nThreadOpt.getOpt()));
            conf.setInt("gatk.hadoop.nthreads", nThreads);
        }
        if (line.hasOption(bamOpt.getOpt())) {
            int rcount = 0;
            BAMInputPath = line.getOptionValue(bamOpt.getOpt());
            validatePath(BAMInputPath, conf);
            Path BAMPath = new Path(BAMInputPath);
            FileSystem fs = BAMPath.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(BAMPath);
            for (int i = 0; i < content.length; i++) {
                String filename = content[i].getPath().getName();
                if (filename.endsWith(".bam")) {
                    String prefix = filename.substring(0, 6);
                    try {
                        Long value = Long.valueOf(prefix);
                    } catch (NumberFormatException e) {
                        String tmpFile = BAMInputPath + Path.SEPARATOR + String.format("%06d", rcount) + "-"
                                + filename;
                        boolean rename = fs.rename(content[i].getPath(), new Path(tmpFile));
                    }
                    rcount++;
                }
            }
        }
        if (line.hasOption(outOpt.getOpt())) {
            outputDir = line.getOptionValue(outOpt.getOpt());
            if (!(new Path(outputDir).getFileSystem(conf).mkdirs(new Path(outputDir)))) {
                throw new Exception("MKDIR failure");
            }
            if (!noalign) {
                BWAOutPath = outputDir + Path.SEPARATOR + "AlignerOut";
                SortBWAOutPath = outputDir + Path.SEPARATOR + "SortedAlignerOut";
                BAMInputPath = outputDir + Path.SEPARATOR + "BAMInput";
            }
            IndelOutPath = outputDir + Path.SEPARATOR + "IndelRealignOut";
            RmdupOutPath = outputDir + Path.SEPARATOR + "DedupOut";
            RecalOutPath = outputDir + Path.SEPARATOR + "RecalibrationOut";
            FinalBAMPath = outputDir + Path.SEPARATOR + "FinalBAMOut";
        } else {
            throw new ParseException("Invalid Usage: output directory is mandatory");
        }
        if (line.hasOption(refFileOpt.getOpt())) {
            Path refFileDir = new Path(line.getOptionValue(refFileOpt.getOpt()));
            FileSystem fs = refFileDir.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(refFileDir);
            for (int i = 0; i < content.length; i++) {
                if ((content[i].getPath().getName()).endsWith(".fa")
                        || (content[i].getPath().getName()).endsWith(".fasta")) {
                    refFileLoc = content[i].getPath().toString();
                }
            }
            validatePath(refFileLoc, conf);
            refFileName = refFileLoc.substring(0, refFileLoc.lastIndexOf("."));
        } else {
            throw new ParseException("Invalid Usage: reference fasta file is mandatory");
        }
        if (line.hasOption(kSiteFileOpt.getOpt())) {
            Path knownSitesDir = new Path(line.getOptionValue(kSiteFileOpt.getOpt()));
            FileSystem fs = knownSitesDir.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(knownSitesDir);
            for (int i = 0; i < content.length; i++) {
                if ((content[i].getPath().getName()).endsWith(".vcf")) {
                    knownSitesLoc = content[i].getPath().toString();
                }
            }
            validatePath(knownSitesLoc, conf);
        }
        if (line.hasOption(platformOpt.getOpt())) {
            platform = line.getOptionValue(platformOpt.getOpt());
            if (platform.equalsIgnoreCase("Linux")) {
                is_azure = false;
                conf.setBoolean("gatk.hadoop.isazure", false);
            }
        }
        if (line.hasOption(isXVariantOpt.getOpt())) {
            xVariantCall = true;
        }
    } catch (ParseException exp) {
        System.out.println(exp.getMessage());
        if (printUsage) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("hadoop jar {/local/path/to/SeqInCloud.jar} {options}", options);
        }
        System.exit(-1);
    } catch (Exception exp) {
        System.out.println("Command line parsing error: " + exp.getMessage());
        System.exit(-1);
    }
}

From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java

License:Open Source License

public static void main(String[] args) {

    String accessKey = args[0];//from w  w  w  . java 2s.  c om
    String secretKey = args[1];

    String paths[] = {
            // "2008/06",
            // "2008/07",
            // "2008/08",
            // "2008/09",
            // "2008/10",
            // "2008/11",
            "2009" };

    for (int pathIndex = 0; pathIndex < paths.length; ++pathIndex) {

        LOG.info("Processing Path:" + paths[pathIndex]);

        JobConf job = new JobConf(S3GetMetdataJob.class);

        Path tempDir = new Path(
                job.get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        LOG.info("Output for Path:" + paths[pathIndex] + " is:" + tempDir);
        System.out.println("Output Path is:" + tempDir);

        job.setJobName("S3 To CrawlURLMetadata Job for Path:" + paths[pathIndex]);

        // setup s3 properties
        JetS3tARCSource.setMaxRetries(job, 1);
        // set up S3 credentials ...
        JetS3tARCSource.setAWSAccessKeyID(job, accessKey);
        JetS3tARCSource.setAWSSecretAccessKey(job, secretKey);
        ARCSplitCalculator.setFilesPerSplit(job, 25);
        // set up arc reader properties
        ArcFileReader.setIOTimeoutValue(30000);
        // set input prefixes ...
        JetS3tARCSource.setInputPrefixes(job, paths[pathIndex]);
        // and S3 bucket name ...
        JetS3tARCSource.setBucketName(job, "commoncrawl");
        // and setup arc source for ArcInputFormat
        ARCInputFormat.setARCSourceClass(job, JetS3tARCSource.class);
        // and set up input format ...
        job.setInputFormat(ARCInputFormat.class);
        // set mapper ...
        job.setMapRunnerClass(S3GetMetdataJob.class);
        // setup reducer (identity in this case ... )
        job.setReducerClass(IdentityReducer.class);
        // standard output format ...
        job.setOutputFormat(SequenceFileOutputFormat.class);
        // set output path
        job.setOutputPath(tempDir);
        // map output types
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(CrawlURLMetadata.class);
        // reduce output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlURLMetadata.class);
        // double the number of reducers ...
        // job.setNumReduceTasks(job.getNumReduceTasks() * 2);

        // run the job ...
        try {
            LOG.info("Starting Job:" + job.getJobName());
            JobClient.runJob(job);
            LOG.info("Finished Job:" + job.getJobName());

            Path finalPath = new Path("jobout/" + paths[pathIndex] + "/result");
            LOG.info("Copying Job Output to:" + finalPath);
            FileSystem fs = FileSystem.get(job);

            try {
                fs.mkdirs(finalPath.getParent());
                fs.rename(tempDir, finalPath);
                LOG.info("Copied Job Output to:" + finalPath);
            } finally {
                // fs.close();
            }

        } catch (IOException e) {
            LOG.error(StringUtils.stringifyException(e));
            e.printStackTrace();
        }
    }
}

From source file:org.commoncrawl.mapred.ec2.parser.OutputCommitter.java

License:Open Source License

private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.getProgressible().progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(context));
        LOG.info("Renaming:" + taskOutput + " to:" + finalOutputPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            LOG.info("Rename Failed for:" + taskOutput + " to:" + finalOutputPath
                    + " Trying Delete and then Rename");
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }/*from  w ww  .  ja v  a  2s .c  o  m*/
            LOG.info("Renaming:" + taskOutput + " to: " + finalOutputPath);
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.info("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(context));
        LOG.info("Moving " + taskOutput + " to " + finalOutputPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                LOG.info("Moving " + path.getPath());
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}