Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Empty_File() throws IOException {
    File in = createTmpFile("in_Splits_Empty_File", empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from   www.j a v a  2  s . c om

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2);

    ArrayList<FileSplit> expected = newArrayList();
    assertEquals(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_One_Split() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_One_Split", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//w ww .j  a  va 2 s. c o m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 1);

    List<FileSplit> expected = newArrayListWithCapacity(1);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 17, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_Ok_Splits() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_Ok_Splits", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from ww  w .j  a v  a2  s.c  o  m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2);

    List<FileSplit> expected = newArrayListWithCapacity(2);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 12, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

@Test
public void splits_Non_Empty_File_More_Splits_Than_Lines() throws IOException {
    File in = createTmpFile("in_Splits_Non_Empty_File_More_Splits_Than_Lines", non_Empty);
    Configuration conf = createConfiguration();

    FileStatus status = EasyMock.createMock(FileStatus.class);
    EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath()));
    EasyMock.expect(status.isDir()).andReturn(false);
    EasyMock.replay(status);//from  w ww .  j  a v a 2s .  c o  m

    List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 10);

    List<FileSplit> expected = newArrayListWithCapacity(3);
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 5, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 5, 7, new String[] {}));
    expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {}));

    checkSplits(expected, splits);
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Gets the different file splits for the data based on a given number of splits
 * /*w  w  w  . ja v  a  2s . c  o m*/
 * @param status
 *          file status
 * @param conf
 *          hadoop configuration object
 * @param numberOfSplits
 *          number of splits to split the data in
 * @return list of file splits
 * @throws IOException
 *           thrown if the file does not exist
 */
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numberOfSplits)
        throws IOException {
    List<FileSplit> splits = newArrayList();
    Path fileName = status.getPath();
    if (status.isDir()) {
        throw new IOException("Not a file: " + fileName);
    }
    long totalNumberOfLines = getTotalNumberOfLines(conf, fileName);
    int numLinesPerSplit = (int) Math.ceil(1.0 * totalNumberOfLines / numberOfSplits);
    LineReader lr = null;
    FSDataInputStream in = null;
    try {
        in = fileName.getFileSystem(conf).open(fileName);
        lr = new LineReader(in, conf);
        Text line = new Text();
        int numLines = 0;
        long begin = 0;
        long length = 0;
        int num = -1;
        while ((num = lr.readLine(line)) > 0) {
            numLines++;
            length += num;
            if (numLines == numLinesPerSplit) {
                splits.add(createFileSplit(fileName, begin, length));
                begin += length;
                length = 0;
                numLines = 0;
            }
        }
        if (numLines != 0) {
            splits.add(createFileSplit(fileName, begin, length));
        }
    } finally {
        if (lr != null) {
            lr.close();
        }
        if (in != null) {
            in.close();
        }
    }
    return splits;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {/*from   w w w .j  a va  2s  .c o m*/
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);/*w  ww.  j a v a 2 s  . co  m*/
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job) throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    Logger.DEBUG("adding input files from " + input);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory()) {
                FileInputFormat.addInputPath(job, file.getPath());
            }//w  w w  .  jav a  2  s. c o  m
        }
    } else
        FileInputFormat.addInputPath(job, new Path(input));
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job, String filter)
        throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory() && file.getPath().getName().endsWith(filter)) {
                FileInputFormat.addInputPath(job, file.getPath());
            }/*from w w  w. j  a va2  s  .c o m*/
        }
    } else {
        FileInputFormat.addInputPath(job, new Path(input));
    }
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setKnownSitesOnHDFS(Configuration conf, String[] val)
        throws IOException, URISyntaxException {
    conf.setInt(numberOfSites, val.length);
    FileSystem fs;/*from  w  ww . j  a va 2s . c  om*/
    for (int i = 0; i < val.length; i++) {
        // check if dir add all files!
        fs = FileSystem.get(new URI(val[i]), conf);
        if (fs.isFile(new Path(val[i]))) {
            conf.set(sitesOnHDFSName + i, val[i]);
        } else {
            FileStatus[] files = fs.listStatus(new Path(val[i]));
            for (FileStatus file : files) {
                if (!file.isDir()) {
                    conf.set(sitesOnHDFSName + i, file.getPath().toString());
                }
            }
        }
    }
}