Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Empty_File() throws IOException {
    File in = createTmpFile("in_Splits_Empty_File", empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from   www.j a v a  2  s . c om

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2);

    ArrayList<FileSplit> expected = newArrayList();
    assertEquals(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_One_Split() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_One_Split", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//w ww .j  a  va 2 s. c o m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 1);

    List<FileSplit> expected = newArrayListWithCapacity(1);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 17, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_Ok_Splits() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_Ok_Splits", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from ww  w .j  a v  a2  s.c  o  m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2);

    List<FileSplit> expected = newArrayListWithCapacity(2);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 12, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_More_Splits_Than_Lines() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_More_Splits_Than_Lines", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from  w ww .  j  a v a 2s .  c o  m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 10);

    List<FileSplit> expected = newArrayListWithCapacity(3);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 5, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 5, 7, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Gets the different file splits for the data based on a given number of splits
 * /*w  w  w  . ja v  a  2s . c  o m*/
 * @param status
 *          file status
 * @param conf
 *          hadoop configuration object
 * @param numberOfSplits
 *          number of splits to split the data in
 * @return list of file splits
 * @throws IOException
 *           thrown if the file does not exist
 */
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numberOfSplits)
        throws IOException {
    List<FileSplit> splits = newArrayList();
    Path fileName = status.getPath();
    if (status.isDir()) {
        throw new IOException("Not a file: " + fileName);
    }
    long totalNumberOfLines = getTotalNumberOfLines(conf, fileName);
    int numLinesPerSplit = (int) Math.ceil(1.0 * totalNumberOfLines / numberOfSplits);
    LineReader lr = null;
    FSDataInputStream in = null;
    try {
        in = fileName.getFileSystem(conf).open(fileName);
        lr = new LineReader(in, conf);
        Text line = new Text();
        int numLines = 0;
        long begin = 0;
        long length = 0;
        int num = -1;
        while ((num = lr.readLine(line)) > 0) {
            numLines++;
            length += num;
            if (numLines == numLinesPerSplit) {
                splits.add(createFileSplit(fileName, begin, length));
                begin += length;
                length = 0;
                numLines = 0;
            }
        }
        if (numLines != 0) {
            splits.add(createFileSplit(fileName, begin, length));
        }
    } finally {
        if (lr != null) {
            lr.close();
        }
        if (in != null) {
            in.close();
        }
    }
    return splits;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {/*from   w w w .j  a va  2s  .c o m*/
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);/*w  ww.  j a v a 2 s  . co  m*/
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job) throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    Logger.DEBUG("adding input files from " + input);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory()) {
                FileInputFormat.addInputPath(job, file.getPath());
            }//w  w w  .  jav a  2  s. c o  m
        }
    } else
        FileInputFormat.addInputPath(job, new Path(input));
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job, String filter)
        throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory() && file.getPath().getName().endsWith(filter)) {
                FileInputFormat.addInputPath(job, file.getPath());
            }/*from w w  w. j  a va2  s  .c o m*/
        }
    } else {
        FileInputFormat.addInputPath(job, new Path(input));
    }
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setKnownSitesOnHDFS(Configuration conf, String[] val)
        throws IOException, URISyntaxException {
    conf.setInt(numberOfSites, val.length);
    FileSystem fs;/*from  w  ww . j  a va 2s . c  om*/
    for (int i = 0; i < val.length; i++) {
        // check if dir add all files!
        fs = FileSystem.get(new URI(val[i]), conf);
        if (fs.isFile(new Path(val[i]))) {
            conf.set(sitesOnHDFSName + i, val[i]);
        } else {
            FileStatus[] files = fs.listStatus(new Path(val[i]));
            for (FileStatus file : files) {
                if (!file.isDir()) {
                    conf.set(sitesOnHDFSName + i, file.getPath().toString());
                }
            }
        }
    }
}