Example usage for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:org.apache.tez.mapreduce.output.LocalOnFileSorterOutput.java

License:Apache License

@Override
public List<Event> close() throws IOException {
    LOG.debug("Closing LocalOnFileSorterOutput");
    super.close();

    TezTaskOutput mapOutputFile = sorter.getMapOutput();
    FileSystem localFs = FileSystem.getLocal(conf);

    Path src = mapOutputFile.getOutputFile();
    Path dst = mapOutputFile.getInputFileForWrite(getContext().getTaskIndex(),
            localFs.getFileStatus(src).getLen());

    LOG.info("Renaming src = " + src + ", dst = " + dst);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Renaming src = " + src + ", dst = " + dst);
    }/*from  w w  w . j  av a  2s.co m*/
    localFs.rename(src, dst);
    return null;
}

From source file:org.apache.tez.runtime.library.output.LocalOnFileSorterOutput.java

License:Apache License

@Override
public List<Event> close() throws IOException {
    LOG.debug("Closing LocalOnFileSorterOutput");
    super.close();

    TezTaskOutput mapOutputFile = sorter.getMapOutput();
    FileSystem localFs = FileSystem.getLocal(conf);

    Path src = mapOutputFile.getOutputFile();
    Path dst = mapOutputFile.getInputFileForWrite(outputContext.getTaskIndex(),
            localFs.getFileStatus(src).getLen());

    LOG.info("Renaming src = " + src + ", dst = " + dst);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Renaming src = " + src + ", dst = " + dst);
    }/*from  w ww .j  a v a  2  s. c  o m*/
    localFs.rename(src, dst);
    return null;
}

From source file:org.archive.access.nutch.jobs.NutchwaxLinkDbMerger.java

License:Open Source License

public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
    JobConf job = NutchwaxLinkDb.createMergeJob(getConf(), output, normalize, filter);

    for (int i = 0; i < dbs.length; i++) {
        job.addInputPath(new Path(dbs[i], LinkDb.CURRENT_NAME));
    }//from w  ww .j a va  2 s  . c  om

    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(output);
    fs.rename(job.getOutputPath(), new Path(output, LinkDb.CURRENT_NAME));
}

From source file:org.archive.nutchwax.PageRankDb.java

License:Apache License

public static void install(JobConf job, Path pageRankDb) throws IOException {
    Path newPageRankDb = FileOutputFormat.getOutputPath(job);
    FileSystem fs = new JobClient(job).getFs();
    Path old = new Path(pageRankDb, "old");
    Path current = new Path(pageRankDb, CURRENT_NAME);
    if (fs.exists(current)) {
        if (fs.exists(old))
            fs.delete(old, true);/*from  w ww .j a  va  2 s. c  o  m*/
        fs.rename(current, old);
    }
    fs.mkdirs(pageRankDb);
    fs.rename(newPageRankDb, current);
    if (fs.exists(old))
        fs.delete(old, true);
    LockUtil.removeLockFile(fs, new Path(pageRankDb, LOCK_NAME));
}

From source file:org.archive.nutchwax.PageRankDbMerger.java

License:Apache License

public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
    JobConf job = createMergeJob(getConf(), output, normalize, filter);
    for (int i = 0; i < dbs.length; i++) {
        FileInputFormat.addInputPath(job, new Path(dbs[i], PageRankDb.CURRENT_NAME));
    }/*ww  w.  ja  va  2  s.  c om*/
    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(output);
    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, PageRankDb.CURRENT_NAME));
}

From source file:org.avenir.tree.DataPartitioner.java

License:Apache License

/**
 * @param outPath/*from w  ww  .ja  v a 2  s.  co  m*/
 * @param segmentCount
 * @param conf
 * @throws IOException
 */
private void moveOutputToSegmentDir(String outPath, int segmentCount, Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);
    for (int i = 0; i < segmentCount; ++i) {
        //create segment dir
        String dir = outPath + "/segment=" + i + "/data";
        Path segmentPath = new Path(dir);
        fileSystem.mkdirs(segmentPath);

        //move output to segment dir
        Path srcFile = new Path(outPath + "/part-r-0000" + i);
        Path dstFile = new Path(outPath + "/segment=" + i + "/data/partition.txt");
        fileSystem.rename(srcFile, dstFile);
    }

    fileSystem.close();
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.output.cram.GaeaKeyIgnoringCramRecordWriter.java

License:Open Source License

@Override
public void close(TaskAttemptContext ctx) throws IOException {
    cramContainerStream.finish(true);//  w  w w .j  ava2  s.  c om
    origOutput.close();

    if (rename) {
        final FileSystem srcFS = outputPath.getFileSystem(ctx.getConfiguration());
        if (this.sample != null) {
            Path newName = new Path(outputPath.getParent() + "/" + sample + ".sorted.cram");
            srcFS.rename(outputPath, newName);
        }
    }
}

From source file:org.broadinstitute.sting.gatk.hadoop.GATKJobClient.java

License:Open Source License

static void parseCommandLineArgs(String[] argv, Configuration conf) {

    CommandLineParser parser = new PosixParser();

    Options options = new Options();

    Option gatkdLocOpt = OptionBuilder.withArgName("depjar_loc").hasArg()
            .withDescription("Complete HDFS path of gatk dependency jar").create("djarloc");
    options.addOption(gatkdLocOpt);//from  ww  w  .  ja  v a  2  s . c o m
    Option bwaLocOpt = OptionBuilder.withArgName("bwa_loc").hasArg()
            .withDescription("Complete HDFS path of bwa binary or bwa.exe file").create("bwaloc");
    options.addOption(bwaLocOpt);
    Option fq1Opt = OptionBuilder.withArgName("fastq_file1").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for 1st fastq file")
            .create("r1");
    options.addOption(fq1Opt);
    Option fq2Opt = OptionBuilder.withArgName("fastq_file2").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for 2nd fastq file")
            .create("r2");
    options.addOption(fq2Opt);
    Option bamOpt = OptionBuilder.withArgName("bam_directory").hasArg()
            .withDescription(
                    "Complete HDFS directory path or path relative to user directory for input BAM file")
            .create("b");
    options.addOption(bamOpt);
    Option outOpt = OptionBuilder.withArgName("output_directory").hasArg()
            .withDescription("Complete HDFS path or path relative to user directory for output directory")
            .create("o");
    options.addOption(outOpt);
    Option rSizeOpt = OptionBuilder.withArgName("fastq_read_size").hasArg()
            .withDescription("Number of bytes of a read sequence in input FastQ file").create("rsize");
    options.addOption(rSizeOpt);
    Option rPSplitOpt = OptionBuilder.withArgName("reads_per_map_split").hasArg()
            .withDescription("Optional number of reads to be processed by a mapper").create("reads_per_split");
    options.addOption(rPSplitOpt);
    Option nRedOpt = OptionBuilder.withArgName("number_of_reducers").hasArg()
            .withDescription("Optional number of reducers").create("nred");
    options.addOption(nRedOpt);
    Option nThreadOpt = OptionBuilder.withArgName("number_of_threads").hasArg()
            .withDescription("Optional number of threads").create("nthreads");
    options.addOption(nThreadOpt);
    Option refFileOpt = OptionBuilder.withArgName("path_to_reference_dir").hasArg()
            .withDescription("Complete HDFS path of reference directory").create("ref");
    options.addOption(refFileOpt);
    Option kSiteFileOpt = OptionBuilder.withArgName("path_to_knownsites_dir").hasArg()
            .withDescription("Complete HDFS path of known-sites db directory").create("dbfile");
    options.addOption(kSiteFileOpt);

    Option platformOpt = OptionBuilder.withArgName("Linux/Windows").hasArg()
            .withDescription("Platform to run on").create("p");
    options.addOption(platformOpt);

    Option noAlignOpt = new Option("na", "noalign", false, "Don't run Alignment stage");
    options.addOption(noAlignOpt);

    Option noReAlignOpt = new Option("nra", "norealign", false, "Do not run Local Realignment stage");
    options.addOption(noReAlignOpt);

    Option noMarkDupOpt = new Option("nmd", "nomarkdup", false, "Do not run Mark Duplicates stage");
    options.addOption(noMarkDupOpt);

    Option noQRecabOpt = new Option("nqr", "noqrecab", false, "Do not run Quality Recalibration stage");
    options.addOption(noQRecabOpt);

    Option noVarOpt = new Option("nv", "novariant", false, "Do not run Structural Variant stage");
    options.addOption(noVarOpt);

    Option noFVarOpt = new Option("nfv", "nofvariant", false, "Do not run Filter Variant stage");
    options.addOption(noFVarOpt);

    Option noMerOpt = new Option("nm", "nomresults", false, "Do not Merge Results");
    options.addOption(noMerOpt);

    Option isXVariantOpt = new Option("xv", "xvariant", false,
            "enable flag, if variant calling should be done independently for INDELs and SNPs");
    options.addOption(isXVariantOpt);

    try {
        // parse the command line arguments
        String[] args = new GenericOptionsParser(conf, options, argv).getRemainingArgs();
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(noAlignOpt.getOpt()))
            noalign = true;
        if (line.hasOption(noReAlignOpt.getOpt()))
            norealign = true;
        if (line.hasOption(noMarkDupOpt.getOpt()))
            nomarkdup = true;
        if (line.hasOption(noQRecabOpt.getOpt()))
            noqrecab = true;
        if (line.hasOption(noVarOpt.getOpt()))
            novariant = true;
        if (line.hasOption(noFVarOpt.getOpt()))
            nofvariant = true;
        if (line.hasOption(noMerOpt.getOpt()))
            nomresults = true;

        if (line.hasOption(fq1Opt.getOpt()) && line.hasOption(bamOpt.getOpt())) {
            throw new ParseException(
                    "Invalid Usage: fastq file and BAM file cannot be given together as input");
        }
        if (line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())) {
            throw new ParseException("Invalid Usage: fastq file2 is invalid without fastq file1");
        }
        if (!line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())
                && !line.hasOption(bamOpt.getOpt())) {
            throw new ParseException(
                    "Invalid Usage: Either the fastq file or BAM file has to be provided as input");
        }
        if (line.hasOption(gatkdLocOpt.getOpt())) {
            gatk_binary_loc = line.getOptionValue(gatkdLocOpt.getOpt());
            validatePath(gatk_binary_loc, conf);
        } else {
            throw new ParseException(
                    "Invalid Usage: GATK dependency jar location (-djarloc) is mandatory for running the pipeline");
        }

        if (!noalign) {
            if (line.hasOption(fq1Opt.getOpt())) {
                readFile1 = line.getOptionValue(fq1Opt.getOpt());
                validatePath(readFile1, conf);
                fqInput = (new Path(readFile1).getParent()).toString();
            }
            if (line.hasOption(fq2Opt.getOpt())) {
                readFile2 = line.getOptionValue(fq2Opt.getOpt());
                conf.setBoolean("gatk.hadoop.pairedend", true);
                validatePath(readFile2, conf);
                conf.set("gatk.hadoop.readfile2", readFile2);
                ;
            }
            if (line.hasOption(rSizeOpt.getOpt())) {
                fq_read_size = Integer.parseInt(line.getOptionValue(rSizeOpt.getOpt()));
            } else {
                throw new ParseException("Invalid Usage: read size (-rsize) is mandatory for Alignment");
            }
            if (line.hasOption(bwaLocOpt.getOpt())) {
                bwa_binary_loc = line.getOptionValue(bwaLocOpt.getOpt());
                validatePath(bwa_binary_loc, conf);
            } else {
                throw new ParseException(
                        "Invalid Usage: bwa binary/exe location (-bwaloc) is mandatory for Alignment");
            }
            if (line.hasOption(rPSplitOpt.getOpt())) {
                reads_per_split = Integer.parseInt(line.getOptionValue(rPSplitOpt.getOpt()));
            }
        }
        if (line.hasOption(nRedOpt.getOpt())) {
            nReducers = Integer.parseInt(line.getOptionValue(nRedOpt.getOpt()));
        }
        if (line.hasOption(nThreadOpt.getOpt())) {
            nThreads = Integer.parseInt(line.getOptionValue(nThreadOpt.getOpt()));
            conf.setInt("gatk.hadoop.nthreads", nThreads);
        }
        if (line.hasOption(bamOpt.getOpt())) {
            int rcount = 0;
            BAMInputPath = line.getOptionValue(bamOpt.getOpt());
            validatePath(BAMInputPath, conf);
            Path BAMPath = new Path(BAMInputPath);
            FileSystem fs = BAMPath.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(BAMPath);
            for (int i = 0; i < content.length; i++) {
                String filename = content[i].getPath().getName();
                if (filename.endsWith(".bam")) {
                    String prefix = filename.substring(0, 6);
                    try {
                        Long value = Long.valueOf(prefix);
                    } catch (NumberFormatException e) {
                        String tmpFile = BAMInputPath + Path.SEPARATOR + String.format("%06d", rcount) + "-"
                                + filename;
                        boolean rename = fs.rename(content[i].getPath(), new Path(tmpFile));
                    }
                    rcount++;
                }
            }
        }
        if (line.hasOption(outOpt.getOpt())) {
            outputDir = line.getOptionValue(outOpt.getOpt());
            if (!(new Path(outputDir).getFileSystem(conf).mkdirs(new Path(outputDir)))) {
                throw new Exception("MKDIR failure");
            }
            if (!noalign) {
                BWAOutPath = outputDir + Path.SEPARATOR + "AlignerOut";
                SortBWAOutPath = outputDir + Path.SEPARATOR + "SortedAlignerOut";
                BAMInputPath = outputDir + Path.SEPARATOR + "BAMInput";
            }
            IndelOutPath = outputDir + Path.SEPARATOR + "IndelRealignOut";
            RmdupOutPath = outputDir + Path.SEPARATOR + "DedupOut";
            RecalOutPath = outputDir + Path.SEPARATOR + "RecalibrationOut";
            FinalBAMPath = outputDir + Path.SEPARATOR + "FinalBAMOut";
        } else {
            throw new ParseException("Invalid Usage: output directory is mandatory");
        }
        if (line.hasOption(refFileOpt.getOpt())) {
            Path refFileDir = new Path(line.getOptionValue(refFileOpt.getOpt()));
            FileSystem fs = refFileDir.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(refFileDir);
            for (int i = 0; i < content.length; i++) {
                if ((content[i].getPath().getName()).endsWith(".fa")
                        || (content[i].getPath().getName()).endsWith(".fasta")) {
                    refFileLoc = content[i].getPath().toString();
                }
            }
            validatePath(refFileLoc, conf);
            refFileName = refFileLoc.substring(0, refFileLoc.lastIndexOf("."));
        } else {
            throw new ParseException("Invalid Usage: reference fasta file is mandatory");
        }
        if (line.hasOption(kSiteFileOpt.getOpt())) {
            Path knownSitesDir = new Path(line.getOptionValue(kSiteFileOpt.getOpt()));
            FileSystem fs = knownSitesDir.getFileSystem(conf);
            FileStatus[] content = fs.listStatus(knownSitesDir);
            for (int i = 0; i < content.length; i++) {
                if ((content[i].getPath().getName()).endsWith(".vcf")) {
                    knownSitesLoc = content[i].getPath().toString();
                }
            }
            validatePath(knownSitesLoc, conf);
        }
        if (line.hasOption(platformOpt.getOpt())) {
            platform = line.getOptionValue(platformOpt.getOpt());
            if (platform.equalsIgnoreCase("Linux")) {
                is_azure = false;
                conf.setBoolean("gatk.hadoop.isazure", false);
            }
        }
        if (line.hasOption(isXVariantOpt.getOpt())) {
            xVariantCall = true;
        }
    } catch (ParseException exp) {
        System.out.println(exp.getMessage());
        if (printUsage) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("hadoop jar {/local/path/to/SeqInCloud.jar} {options}", options);
        }
        System.exit(-1);
    } catch (Exception exp) {
        System.out.println("Command line parsing error: " + exp.getMessage());
        System.exit(-1);
    }
}

From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java

License:Open Source License

public static void main(String[] args) {

    String accessKey = args[0];//from w  w  w  . java 2s.  c om
    String secretKey = args[1];

    String paths[] = {
            // "2008/06",
            // "2008/07",
            // "2008/08",
            // "2008/09",
            // "2008/10",
            // "2008/11",
            "2009" };

    for (int pathIndex = 0; pathIndex < paths.length; ++pathIndex) {

        LOG.info("Processing Path:" + paths[pathIndex]);

        JobConf job = new JobConf(S3GetMetdataJob.class);

        Path tempDir = new Path(
                job.get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        LOG.info("Output for Path:" + paths[pathIndex] + " is:" + tempDir);
        System.out.println("Output Path is:" + tempDir);

        job.setJobName("S3 To CrawlURLMetadata Job for Path:" + paths[pathIndex]);

        // setup s3 properties
        JetS3tARCSource.setMaxRetries(job, 1);
        // set up S3 credentials ...
        JetS3tARCSource.setAWSAccessKeyID(job, accessKey);
        JetS3tARCSource.setAWSSecretAccessKey(job, secretKey);
        ARCSplitCalculator.setFilesPerSplit(job, 25);
        // set up arc reader properties
        ArcFileReader.setIOTimeoutValue(30000);
        // set input prefixes ...
        JetS3tARCSource.setInputPrefixes(job, paths[pathIndex]);
        // and S3 bucket name ...
        JetS3tARCSource.setBucketName(job, "commoncrawl");
        // and setup arc source for ArcInputFormat
        ARCInputFormat.setARCSourceClass(job, JetS3tARCSource.class);
        // and set up input format ...
        job.setInputFormat(ARCInputFormat.class);
        // set mapper ...
        job.setMapRunnerClass(S3GetMetdataJob.class);
        // setup reducer (identity in this case ... )
        job.setReducerClass(IdentityReducer.class);
        // standard output format ...
        job.setOutputFormat(SequenceFileOutputFormat.class);
        // set output path
        job.setOutputPath(tempDir);
        // map output types
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(CrawlURLMetadata.class);
        // reduce output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlURLMetadata.class);
        // double the number of reducers ...
        // job.setNumReduceTasks(job.getNumReduceTasks() * 2);

        // run the job ...
        try {
            LOG.info("Starting Job:" + job.getJobName());
            JobClient.runJob(job);
            LOG.info("Finished Job:" + job.getJobName());

            Path finalPath = new Path("jobout/" + paths[pathIndex] + "/result");
            LOG.info("Copying Job Output to:" + finalPath);
            FileSystem fs = FileSystem.get(job);

            try {
                fs.mkdirs(finalPath.getParent());
                fs.rename(tempDir, finalPath);
                LOG.info("Copied Job Output to:" + finalPath);
            } finally {
                // fs.close();
            }

        } catch (IOException e) {
            LOG.error(StringUtils.stringifyException(e));
            e.printStackTrace();
        }
    }
}

From source file:org.commoncrawl.mapred.ec2.parser.OutputCommitter.java

License:Open Source License

private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.getProgressible().progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(context));
        LOG.info("Renaming:" + taskOutput + " to:" + finalOutputPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            LOG.info("Rename Failed for:" + taskOutput + " to:" + finalOutputPath
                    + " Trying Delete and then Rename");
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }/*from  w ww  .  ja v  a  2s .c  o  m*/
            LOG.info("Renaming:" + taskOutput + " to: " + finalOutputPath);
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.info("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(context));
        LOG.info("Moving " + taskOutput + " to " + finalOutputPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                LOG.info("Moving " + path.getPath());
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}