List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java
License:Apache License
private void getPgStartIndex(Configuration conf) { try {/*from ww w . ja v a2 s . c om*/ Path path = new Path(basePGDir); FileSystem fs = path.getFileSystem(new Configuration()); if (!fs.exists(path)) { pgStartIndex = 0; return; } int largestIx = 0; for (FileStatus file : fs.listStatus(path)) { String tmp = file.getPath().toString(); if (!tmp.contains("bucket")) { continue; } tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length()); int ix = Integer.parseInt(tmp.split("-")[1]); largestIx = Math.max(largestIx, ix); pgStartIndex += 1; } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
/** * Cleans the Hadoop file system by deleting the specified files if they exist. * //from www. ja v a 2 s . c om * @param files * the files to delete */ public static void cleanDirs(String... files) { System.out.println("[Cleaning]: Cleaning HDFS"); Configuration conf = new Configuration(); for (String filename : files) { System.out.println("[Cleaning]: Trying to delete " + filename); Path path = new Path(filename); try { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { if (fs.delete(path, true)) { System.out.println("[Cleaning]: Deleted " + filename); } else { System.out.println("[Cleaning]: Error while deleting " + filename); } } else { System.out.println("[Cleaning]: " + filename + " does not exist on HDFS"); } } catch (IOException e) { e.printStackTrace(); } } }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);//from w w w . ja v a2s . c om Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String pipeline = ""; if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { HalvadeConf.setIsPass2(halvadeConf, true); HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = RNA_PASS2;// w w w .j a v a2 s . c om } else if (jobType == HalvadeResourceManager.DNA) { HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = DNA; } HalvadeConf.setOutDir(halvadeConf, tmpOutDir); FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf); if (outFs.exists(new Path(tmpOutDir))) { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } if (halvadeOpts.useBamInput) setHeaderFile(halvadeOpts.in, halvadeConf); Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline); halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob); FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir)); if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class); } else if (jobType == HalvadeResourceManager.DNA) { halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class); } halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class); halvadeJob.setMapOutputValueClass(SAMRecordWritable.class); halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class); halvadeJob.setOutputKeyClass(Text.class); if (halvadeOpts.mergeBam) { halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class); halvadeJob.setOutputValueClass(SAMRecordWritable.class); } else { halvadeJob.setPartitionerClass(ChrRgPartitioner.class); halvadeJob.setSortComparatorClass(ChrRgSortComparator.class); halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class); halvadeJob.setOutputValueClass(VariantContextWritable.class); } if (halvadeOpts.justAlign) halvadeJob.setNumReduceTasks(0); else if (halvadeOpts.mergeBam) { halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class); halvadeJob.setNumReduceTasks(1); } else halvadeJob.setNumReduceTasks(halvadeOpts.reduces); if (halvadeOpts.useBamInput) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class); halvadeJob.setInputFormatClass(BAMInputFormat.class); } return runTimedJob(halvadeJob, "Halvade Job"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if (!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2);/*from w ww . j a v a 2 s. c o m*/ } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }
From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java
License:Open Source License
private int processFiles() throws IOException, InterruptedException, URISyntaxException, Throwable { Timer timer = new Timer(); timer.start();/* www .jav a 2 s.co m*/ AWSUploader upl = null; FileSystem fs = null; // write to s3? boolean useAWS = false; if (outputDir.startsWith("s3")) { useAWS = true; String existingBucketName = outputDir.replace("s3://", "").split("/")[0]; outputDir = outputDir.replace("s3://" + existingBucketName + "/", ""); upl = new AWSUploader(existingBucketName, SSE, profile); } else { Configuration conf = getConf(); fs = FileSystem.get(new URI(outputDir), conf); Path outpath = new Path(outputDir); if (fs.exists(outpath) && !fs.getFileStatus(outpath).isDirectory()) { Logger.DEBUG("please provide an output directory"); return 1; } } FileReaderFactory factory = FileReaderFactory.getInstance(mthreads); if (manifest != null) { Logger.DEBUG("reading input files from " + manifest); // read from file BufferedReader br = new BufferedReader(new FileReader(manifest)); String line; while ((line = br.readLine()) != null) { String[] files = line.split("\t"); if (files.length == 2) { factory.addReader(files[0], files[1], false); } else if (files.length == 1) { factory.addReader(files[0], null, isInterleaved); } } } else if (file1 != null && file2 != null) { Logger.DEBUG("Paired-end read input in 2 files."); factory.addReader(file1, file2, false); } else if (file1 != null) { if (isInterleaved) Logger.DEBUG("Single-end read input in 1 files."); else Logger.DEBUG("Paired-end read input in 1 files."); factory.addReader(file1, null, isInterleaved); } else { Logger.DEBUG("Incorrect input, use either a manifest file or give both file1 and file2 as input."); } // start reading (new Thread(factory)).start(); int bestThreads = mthreads; long maxFileSize = getBestFileSize(); if (useAWS) { AWSInterleaveFiles[] fileThreads = new AWSInterleaveFiles[bestThreads]; // start interleaveFile threads for (int t = 0; t < bestThreads; t++) { fileThreads[t] = new AWSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, upl, t, codec); fileThreads[t].start(); } for (int t = 0; t < bestThreads; t++) fileThreads[t].join(); if (upl != null) upl.shutDownNow(); } else { HDFSInterleaveFiles[] fileThreads = new HDFSInterleaveFiles[bestThreads]; // start interleaveFile threads for (int t = 0; t < bestThreads; t++) { fileThreads[t] = new HDFSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, fs, t, codec); fileThreads[t].start(); } for (int t = 0; t < bestThreads; t++) fileThreads[t].join(); } factory.finalize(); timer.stop(); Logger.DEBUG("Time to process data: " + timer.getFormattedCurrentTime()); return 0; }
From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java
License:Open Source License
public void exportSplitter(String filename, Configuration conf) throws URISyntaxException, IOException { DataOutputStream dos = null;//from w w w . j ava 2 s . c om FileSystem hdfs = null; try { hdfs = FileSystem.get(new URI(filename), conf); Path file = new Path(filename); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); dos = new DataOutputStream(os); dos.writeInt(regions.size()); for (BedRegion region : regions) { dos.writeUTF(region.contig); dos.writeInt(region.start); dos.writeInt(region.end); dos.writeInt(region.key); } } finally { if (dos != null) dos.close(); } }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
/** * @return returns 0 if successfull, -1 if filesize is incorrect and -2 if an exception occurred *///from w ww.j av a 2 s . c o m protected static int privateUploadFileToHDFS(TaskInputOutputContext context, FileSystem fs, String from, String to) { try { // check if file is present on HDFS Path toPath = new Path(to); Path fromPath = new Path(from); File f = new File(from); if (!fs.exists(toPath)) { fs.copyFromLocalFile(fromPath, toPath); context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length()); } else { // check if filesize is correct if (fs.getFileStatus(toPath).getLen() != f.length()) { // incorrect filesize, remove and download again fs.delete(toPath, false); fs.copyFromLocalFile(fromPath, toPath); context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length()); } } if (fs.getFileStatus(toPath).getLen() != f.length()) return -1; else return 0; } catch (IOException ex) { Logger.DEBUG("failed to upload " + from + " to HDFS: " + ex.getLocalizedMessage()); Logger.EXCEPTION(ex); return -2; } }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String downloadSTARIndex(TaskInputOutputContext context, String id, boolean usePass2Genome) throws IOException, URISyntaxException { Configuration conf = context.getConfiguration(); String tmpDir = HalvadeConf.getScratchTempDir(conf); String refDir = HalvadeConf.getRefDirOnScratch(conf); if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, tmpDir, STARG_LOCK); String refBase = null;/* w ww.j av a 2 s . c om*/ try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == DEFAULT_LOCK_VAL) Logger.DEBUG("reference has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing reference index files to local scratch"); if (usePass2Genome) Logger.DEBUG("using Pass2 genome"); String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf) : HalvadeConf.getStarDirOnHDFS(conf); Logger.DEBUG("downloading STAR genome from: " + HDFSRef); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-star/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("STAR dir: " + refBase); if (!usePass2Genome) { for (String suffix : STAR_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } for (String suffix : STAR_REF_OPTIONAL_FILES) { if (fs.exists(new Path(HDFSRef + suffix))) attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File( refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1)); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing reference index files to local scratch"); if (usePass2Genome) Logger.DEBUG("using Pass2 genome"); String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf) : HalvadeConf.getStarDirOnHDFS(conf); Logger.DEBUG("downloading STAR genome from: " + HDFSRef); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-star/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("STAR dir: " + refBase); if (!usePass2Genome) { for (String suffix : STAR_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } for (String suffix : STAR_REF_OPTIONAL_FILES) { if (fs.exists(new Path(HDFSRef + suffix))) attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1)); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); return refBase; }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String[] downloadSites(TaskInputOutputContext context, String id) throws IOException, URISyntaxException, InterruptedException { Configuration conf = context.getConfiguration(); String tmpDir = HalvadeConf.getScratchTempDir(conf); String refDir = HalvadeConf.getRefDirOnScratch(conf); String HDFSsites[] = HalvadeConf.getKnownSitesOnHDFS(conf); String[] localSites = new String[HDFSsites.length]; if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, refDir, DBSNP_LOCK); String refBase = null;/*from w w w . j av a2 s .c o m*/ try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == DEFAULT_LOCK_VAL) Logger.DEBUG("dbSNP has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing dbSNP to local scratch"); refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-dbsnp/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("dbSNP dir: " + refBase); for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); Logger.DEBUG("Downloading " + name); FileSystem fs = FileSystem.get(new URI(fullName), conf); attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES); localSites[i] = refBase + name; // attempt to download .idx file if (!foundExisting && fs.exists(new Path(fullName + ".idx"))) attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx", RETRIES); } Logger.INFO("finished downloading the new sites to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_DBSNP_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing dbSNP to local scratch"); refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-dbsnp/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("dbSNP dir: " + refBase); for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); Logger.DEBUG("Downloading " + name); FileSystem fs = FileSystem.get(new URI(fullName), conf); attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES); localSites[i] = refBase + name; // attempt to download .idx file if (!foundExisting && fs.exists(new Path(fullName + ".idx"))) attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx", RETRIES); } Logger.INFO("finished downloading the new sites to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_DBSNP_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) { refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); File dir = new File(refBase); File[] directoryListing = dir.listFiles(); if (directoryListing != null) { int found = 0; for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); localSites[i] = refBase + name; if ((new File(localSites[i])).exists()) found++; else Logger.DEBUG(name + " not found in local scratch"); } if (found != HDFSsites.length) { throw new IOException( refBase + " has different number of files: " + found + " vs " + localSites.length); } } else { throw new IOException(refBase + " has no files"); } } return localSites; }