Example usage for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException

Source Link

Document

Check if a path exists.

Usage

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java

License:Apache License

private void getPgStartIndex(Configuration conf) {
    try {/*from   ww  w  . ja v a2  s . c om*/
        Path path = new Path(basePGDir);
        FileSystem fs = path.getFileSystem(new Configuration());

        if (!fs.exists(path)) {
            pgStartIndex = 0;
            return;
        }

        int largestIx = 0;
        for (FileStatus file : fs.listStatus(path)) {
            String tmp = file.getPath().toString();
            if (!tmp.contains("bucket")) {
                continue;
            }
            tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length());
            int ix = Integer.parseInt(tmp.split("-")[1]);
            largestIx = Math.max(largestIx, ix);
            pgStartIndex += 1;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

/**
 * Cleans the Hadoop file system by deleting the specified files if they exist.
 * //from www.  ja  v a  2  s  .  c  om
 * @param files
 *          the files to delete
 */
public static void cleanDirs(String... files) {
    System.out.println("[Cleaning]: Cleaning HDFS");
    Configuration conf = new Configuration();
    for (String filename : files) {
        System.out.println("[Cleaning]: Trying to delete " + filename);
        Path path = new Path(filename);
        try {
            FileSystem fs = path.getFileSystem(conf);
            if (fs.exists(path)) {
                if (fs.delete(path, true)) {
                    System.out.println("[Cleaning]: Deleted " + filename);
                } else {
                    System.out.println("[Cleaning]: Error while deleting " + filename);
                }
            } else {
                System.out.println("[Cleaning]: " + filename + " does not exist on HDFS");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);//from w w w  . ja v a2s .  c  om
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String pipeline = "";
    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        HalvadeConf.setIsPass2(halvadeConf, true);
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = RNA_PASS2;// w  w  w  .j  a  v a2  s .  c om
    } else if (jobType == HalvadeResourceManager.DNA) {
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = DNA;
    }
    HalvadeConf.setOutDir(halvadeConf, tmpOutDir);
    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf);
    if (outFs.exists(new Path(tmpOutDir))) {
        Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);
    }
    if (halvadeOpts.useBamInput)
        setHeaderFile(halvadeOpts.in, halvadeConf);

    Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline);
    halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob);
    FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir));

    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class);
    } else if (jobType == HalvadeResourceManager.DNA) {
        halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class);
    }

    halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class);
    halvadeJob.setMapOutputValueClass(SAMRecordWritable.class);
    halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class);
    halvadeJob.setOutputKeyClass(Text.class);
    if (halvadeOpts.mergeBam) {
        halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class);
        halvadeJob.setOutputValueClass(SAMRecordWritable.class);
    } else {
        halvadeJob.setPartitionerClass(ChrRgPartitioner.class);
        halvadeJob.setSortComparatorClass(ChrRgSortComparator.class);
        halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class);
        halvadeJob.setOutputValueClass(VariantContextWritable.class);
    }

    if (halvadeOpts.justAlign)
        halvadeJob.setNumReduceTasks(0);
    else if (halvadeOpts.mergeBam) {
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class);
        halvadeJob.setNumReduceTasks(1);
    } else
        halvadeJob.setNumReduceTasks(halvadeOpts.reduces);

    if (halvadeOpts.useBamInput) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class);
        halvadeJob.setInputFormatClass(BAMInputFormat.class);
    }

    return runTimedJob(halvadeJob, "Halvade Job");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Configuration combineConf = getConf();
    if (!halvadeOpts.out.endsWith("/"))
        halvadeOpts.out += "/";
    HalvadeConf.setInputDir(combineConf, halvadeOutDir);
    HalvadeConf.setOutDir(combineConf, mergeOutDir);
    FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf);
    if (outFs.exists(new Path(mergeOutDir))) {
        Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);/*from w  ww . j a  v  a  2 s. c o m*/
    }
    HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
    HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false,
            halvadeOpts.useBamInput);
    Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF");
    combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);

    addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf");
    FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir));

    combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);
    combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class);
    combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);
    combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class);
    combineJob.setNumReduceTasks(1);
    combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class);
    combineJob.setOutputKeyClass(Text.class);
    combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);

    return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job");
}

From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java

License:Open Source License

private int processFiles() throws IOException, InterruptedException, URISyntaxException, Throwable {
    Timer timer = new Timer();
    timer.start();/*  www  .jav  a  2 s.co  m*/

    AWSUploader upl = null;
    FileSystem fs = null;
    // write to s3?
    boolean useAWS = false;
    if (outputDir.startsWith("s3")) {
        useAWS = true;
        String existingBucketName = outputDir.replace("s3://", "").split("/")[0];
        outputDir = outputDir.replace("s3://" + existingBucketName + "/", "");
        upl = new AWSUploader(existingBucketName, SSE, profile);
    } else {
        Configuration conf = getConf();
        fs = FileSystem.get(new URI(outputDir), conf);
        Path outpath = new Path(outputDir);
        if (fs.exists(outpath) && !fs.getFileStatus(outpath).isDirectory()) {
            Logger.DEBUG("please provide an output directory");
            return 1;
        }
    }

    FileReaderFactory factory = FileReaderFactory.getInstance(mthreads);
    if (manifest != null) {
        Logger.DEBUG("reading input files from " + manifest);
        // read from file
        BufferedReader br = new BufferedReader(new FileReader(manifest));
        String line;
        while ((line = br.readLine()) != null) {
            String[] files = line.split("\t");
            if (files.length == 2) {
                factory.addReader(files[0], files[1], false);
            } else if (files.length == 1) {
                factory.addReader(files[0], null, isInterleaved);
            }
        }
    } else if (file1 != null && file2 != null) {
        Logger.DEBUG("Paired-end read input in 2 files.");
        factory.addReader(file1, file2, false);
    } else if (file1 != null) {
        if (isInterleaved)
            Logger.DEBUG("Single-end read input in 1 files.");
        else
            Logger.DEBUG("Paired-end read input in 1 files.");
        factory.addReader(file1, null, isInterleaved);
    } else {
        Logger.DEBUG("Incorrect input, use either a manifest file or give both file1 and file2 as input.");
    }

    // start reading
    (new Thread(factory)).start();

    int bestThreads = mthreads;
    long maxFileSize = getBestFileSize();
    if (useAWS) {
        AWSInterleaveFiles[] fileThreads = new AWSInterleaveFiles[bestThreads];
        // start interleaveFile threads
        for (int t = 0; t < bestThreads; t++) {
            fileThreads[t] = new AWSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, upl, t,
                    codec);
            fileThreads[t].start();
        }
        for (int t = 0; t < bestThreads; t++)
            fileThreads[t].join();
        if (upl != null)
            upl.shutDownNow();
    } else {

        HDFSInterleaveFiles[] fileThreads = new HDFSInterleaveFiles[bestThreads];
        // start interleaveFile threads
        for (int t = 0; t < bestThreads; t++) {
            fileThreads[t] = new HDFSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, fs, t,
                    codec);
            fileThreads[t].start();
        }
        for (int t = 0; t < bestThreads; t++)
            fileThreads[t].join();
    }
    factory.finalize();
    timer.stop();
    Logger.DEBUG("Time to process data: " + timer.getFormattedCurrentTime());
    return 0;
}

From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java

License:Open Source License

public void exportSplitter(String filename, Configuration conf) throws URISyntaxException, IOException {
    DataOutputStream dos = null;//from w  w  w . j ava  2  s  . c om
    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(new URI(filename), conf);
        Path file = new Path(filename);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }
        OutputStream os = hdfs.create(file);
        dos = new DataOutputStream(os);
        dos.writeInt(regions.size());
        for (BedRegion region : regions) {
            dos.writeUTF(region.contig);
            dos.writeInt(region.start);
            dos.writeInt(region.end);
            dos.writeInt(region.key);
        }
    } finally {
        if (dos != null)
            dos.close();
    }
}

From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java

License:Open Source License

/**
 * @return returns 0 if successfull, -1 if filesize is incorrect and -2 if an exception occurred
 *///from w ww.j  av a 2 s  . c  o  m
protected static int privateUploadFileToHDFS(TaskInputOutputContext context, FileSystem fs, String from,
        String to) {
    try {
        // check if file is present on HDFS
        Path toPath = new Path(to);
        Path fromPath = new Path(from);
        File f = new File(from);
        if (!fs.exists(toPath)) {
            fs.copyFromLocalFile(fromPath, toPath);
            context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length());
        } else {
            // check if filesize is correct
            if (fs.getFileStatus(toPath).getLen() != f.length()) {
                // incorrect filesize, remove and download again
                fs.delete(toPath, false);
                fs.copyFromLocalFile(fromPath, toPath);
                context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length());
            }
        }
        if (fs.getFileStatus(toPath).getLen() != f.length())
            return -1;
        else
            return 0;
    } catch (IOException ex) {
        Logger.DEBUG("failed to upload " + from + " to HDFS: " + ex.getLocalizedMessage());
        Logger.EXCEPTION(ex);
        return -2;
    }
}

From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java

License:Open Source License

public static String downloadSTARIndex(TaskInputOutputContext context, String id, boolean usePass2Genome)
        throws IOException, URISyntaxException {
    Configuration conf = context.getConfiguration();
    String tmpDir = HalvadeConf.getScratchTempDir(conf);
    String refDir = HalvadeConf.getRefDirOnScratch(conf);
    if (!refDir.endsWith("/"))
        refDir = refDir + "/";
    HalvadeFileLock lock = new HalvadeFileLock(context, tmpDir, STARG_LOCK);
    String refBase = null;/* w ww.j av  a 2  s  . c om*/
    try {
        lock.getLock();

        ByteBuffer bytes = ByteBuffer.allocate(4);
        if (lock.read(bytes) > 0) {
            bytes.flip();
            long val = bytes.getInt();
            if (val == DEFAULT_LOCK_VAL)
                Logger.DEBUG("reference has been downloaded to local scratch: " + val);
            else {
                Logger.INFO("downloading missing reference index files to local scratch");
                if (usePass2Genome)
                    Logger.DEBUG("using Pass2 genome");
                String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf)
                        : HalvadeConf.getStarDirOnHDFS(conf);
                Logger.DEBUG("downloading STAR genome from: " + HDFSRef);
                FileSystem fs = FileSystem.get(new URI(HDFSRef), conf);
                refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1,
                        true);
                boolean foundExisting = (refBase != null);
                if (!foundExisting) {
                    refBase = refDir + id + "-star/";
                    //make dir
                    File makeRefDir = new File(refBase);
                    makeRefDir.mkdir();
                }
                Logger.DEBUG("STAR dir: " + refBase);
                if (!usePass2Genome) {
                    for (String suffix : STAR_REF_FILES) {
                        attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES);
                    }
                    for (String suffix : STAR_REF_OPTIONAL_FILES) {
                        if (fs.exists(new Path(HDFSRef + suffix)))
                            attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix,
                                    RETRIES);
                    }
                }
                Logger.INFO("FINISHED downloading the complete reference index to local scratch");
                if (!foundExisting) {
                    File f = new File(
                            refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1));
                    f.createNewFile();
                }
                bytes.clear();
                bytes.putInt(DEFAULT_LOCK_VAL).flip();
                lock.forceWrite(bytes);
            }
        } else {
            Logger.INFO("downloading missing reference index files to local scratch");
            if (usePass2Genome)
                Logger.DEBUG("using Pass2 genome");
            String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf)
                    : HalvadeConf.getStarDirOnHDFS(conf);
            Logger.DEBUG("downloading STAR genome from: " + HDFSRef);
            FileSystem fs = FileSystem.get(new URI(HDFSRef), conf);
            refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true);
            boolean foundExisting = (refBase != null);
            if (!foundExisting) {
                refBase = refDir + id + "-star/";
                //make dir
                File makeRefDir = new File(refBase);
                makeRefDir.mkdir();
            }
            Logger.DEBUG("STAR dir: " + refBase);
            if (!usePass2Genome) {
                for (String suffix : STAR_REF_FILES) {
                    attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES);
                }
                for (String suffix : STAR_REF_OPTIONAL_FILES) {
                    if (fs.exists(new Path(HDFSRef + suffix)))
                        attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES);
                }
            }
            Logger.INFO("FINISHED downloading the complete reference index to local scratch");
            if (!foundExisting) {
                File f = new File(refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1));
                f.createNewFile();
            }
            bytes.clear();
            bytes.putInt(DEFAULT_LOCK_VAL).flip();
            lock.forceWrite(bytes);
        }

    } catch (InterruptedException ex) {
        Logger.EXCEPTION(ex);
    } finally {
        lock.releaseLock();
    }
    if (refBase == null)
        refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true);
    return refBase;
}

From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java

License:Open Source License

public static String[] downloadSites(TaskInputOutputContext context, String id)
        throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String tmpDir = HalvadeConf.getScratchTempDir(conf);
    String refDir = HalvadeConf.getRefDirOnScratch(conf);
    String HDFSsites[] = HalvadeConf.getKnownSitesOnHDFS(conf);
    String[] localSites = new String[HDFSsites.length];
    if (!refDir.endsWith("/"))
        refDir = refDir + "/";
    HalvadeFileLock lock = new HalvadeFileLock(context, refDir, DBSNP_LOCK);
    String refBase = null;/*from  w w w . j av  a2  s  .c o  m*/
    try {
        lock.getLock();

        ByteBuffer bytes = ByteBuffer.allocate(4);
        if (lock.read(bytes) > 0) {
            bytes.flip();
            long val = bytes.getInt();
            if (val == DEFAULT_LOCK_VAL)
                Logger.DEBUG("dbSNP has been downloaded to local scratch: " + val);
            else {
                Logger.INFO("downloading missing dbSNP to local scratch");
                refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true);
                boolean foundExisting = (refBase != null);
                if (!foundExisting) {
                    refBase = refDir + id + "-dbsnp/";
                    //make dir
                    File makeRefDir = new File(refBase);
                    makeRefDir.mkdir();
                }
                Logger.DEBUG("dbSNP dir: " + refBase);

                for (int i = 0; i < HDFSsites.length; i++) {
                    String fullName = HDFSsites[i];
                    String name = fullName.substring(fullName.lastIndexOf('/') + 1);
                    Logger.DEBUG("Downloading " + name);
                    FileSystem fs = FileSystem.get(new URI(fullName), conf);
                    attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES);
                    localSites[i] = refBase + name;
                    // attempt to download .idx file
                    if (!foundExisting && fs.exists(new Path(fullName + ".idx")))
                        attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx",
                                RETRIES);
                }

                Logger.INFO("finished downloading the new sites to local scratch");
                if (!foundExisting) {
                    File f = new File(refBase + HALVADE_DBSNP_SUFFIX);
                    f.createNewFile();
                }
                bytes.clear();
                bytes.putInt(DEFAULT_LOCK_VAL).flip();
                lock.forceWrite(bytes);
            }
        } else {
            Logger.INFO("downloading missing dbSNP to local scratch");
            refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true);
            boolean foundExisting = (refBase != null);
            if (!foundExisting) {
                refBase = refDir + id + "-dbsnp/";
                //make dir
                File makeRefDir = new File(refBase);
                makeRefDir.mkdir();
            }
            Logger.DEBUG("dbSNP dir: " + refBase);

            for (int i = 0; i < HDFSsites.length; i++) {
                String fullName = HDFSsites[i];
                String name = fullName.substring(fullName.lastIndexOf('/') + 1);
                Logger.DEBUG("Downloading " + name);
                FileSystem fs = FileSystem.get(new URI(fullName), conf);
                attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES);
                localSites[i] = refBase + name;
                // attempt to download .idx file
                if (!foundExisting && fs.exists(new Path(fullName + ".idx")))
                    attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx",
                            RETRIES);
            }

            Logger.INFO("finished downloading the new sites to local scratch");
            if (!foundExisting) {
                File f = new File(refBase + HALVADE_DBSNP_SUFFIX);
                f.createNewFile();
            }
            bytes.clear();
            bytes.putInt(DEFAULT_LOCK_VAL).flip();
            lock.forceWrite(bytes);
        }
    } catch (InterruptedException ex) {
        Logger.EXCEPTION(ex);
    } finally {
        lock.releaseLock();
    }
    if (refBase == null) {
        refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true);
        File dir = new File(refBase);
        File[] directoryListing = dir.listFiles();
        if (directoryListing != null) {
            int found = 0;
            for (int i = 0; i < HDFSsites.length; i++) {
                String fullName = HDFSsites[i];
                String name = fullName.substring(fullName.lastIndexOf('/') + 1);
                localSites[i] = refBase + name;
                if ((new File(localSites[i])).exists())
                    found++;
                else
                    Logger.DEBUG(name + " not found in local scratch");
            }
            if (found != HDFSsites.length) {
                throw new IOException(
                        refBase + " has different number of files: " + found + " vs " + localSites.length);
            }
        } else {
            throw new IOException(refBase + " has no files");
        }
    }
    return localSites;
}