List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Empty_File() throws IOException { File in = createTmpFile("in_Splits_Empty_File", empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//from www.j a v a 2 s . c om List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2); ArrayList<FileSplit> expected = newArrayList(); assertEquals(expected, splits); }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_One_Split() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_One_Split", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//w ww .j a va 2 s. c o m List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 1); List<FileSplit> expected = newArrayListWithCapacity(1); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 17, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_Ok_Splits() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_Ok_Splits", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//from ww w .j a v a2 s.c o m List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2); List<FileSplit> expected = newArrayListWithCapacity(2); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 12, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_More_Splits_Than_Lines() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_More_Splits_Than_Lines", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//from w ww . j a v a 2s . c o m List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 10); List<FileSplit> expected = newArrayListWithCapacity(3); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 5, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 5, 7, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
/** * Gets the different file splits for the data based on a given number of splits * /*w w w . ja v a 2s . c o m*/ * @param status * file status * @param conf * hadoop configuration object * @param numberOfSplits * number of splits to split the data in * @return list of file splits * @throws IOException * thrown if the file does not exist */ public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numberOfSplits) throws IOException { List<FileSplit> splits = newArrayList(); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); } long totalNumberOfLines = getTotalNumberOfLines(conf, fileName); int numLinesPerSplit = (int) Math.ceil(1.0 * totalNumberOfLines / numberOfSplits); LineReader lr = null; FSDataInputStream in = null; try { in = fileName.getFileSystem(conf).open(fileName); lr = new LineReader(in, conf); Text line = new Text(); int numLines = 0; long begin = 0; long length = 0; int num = -1; while ((num = lr.readLine(line)) > 0) { numLines++; length += num; if (numLines == numLinesPerSplit) { splits.add(createFileSplit(fileName, begin, length)); begin += length; length = 0; numLines = 0; } } if (numLines != 0) { splits.add(createFileSplit(fileName, begin, length)); } } finally { if (lr != null) { lr.close(); } if (in != null) { in.close(); } } return splits; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) { Path path = new Path(dir); try {/*from w w w .j a va 2s .c o m*/ for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) { String[] sp = fs.getPath().toString().split(Path.SEPARATOR); String filename = sp[sp.length - 1]; if (toKeep.contains(filename)) { cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS"); continue; } cleanDirs(fs.getPath().toString()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);/*w ww. j a v a 2 s . co m*/ Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected void addInputFiles(String input, Configuration conf, Job job) throws URISyntaxException, IOException { FileSystem fs = FileSystem.get(new URI(input), conf); Logger.DEBUG("adding input files from " + input); if (fs.getFileStatus(new Path(input)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(input)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(job, file.getPath()); }//w w w . jav a 2 s. c o m } } else FileInputFormat.addInputPath(job, new Path(input)); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected void addInputFiles(String input, Configuration conf, Job job, String filter) throws URISyntaxException, IOException { FileSystem fs = FileSystem.get(new URI(input), conf); if (fs.getFileStatus(new Path(input)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(input)); for (FileStatus file : files) { if (!file.isDirectory() && file.getPath().getName().endsWith(filter)) { FileInputFormat.addInputPath(job, file.getPath()); }/*from w w w. j a va2 s .c o m*/ } } else { FileInputFormat.addInputPath(job, new Path(input)); } }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void setKnownSitesOnHDFS(Configuration conf, String[] val) throws IOException, URISyntaxException { conf.setInt(numberOfSites, val.length); FileSystem fs;/*from w ww . j a va 2s . c om*/ for (int i = 0; i < val.length; i++) { // check if dir add all files! fs = FileSystem.get(new URI(val[i]), conf); if (fs.isFile(new Path(val[i]))) { conf.set(sitesOnHDFSName + i, val[i]); } else { FileStatus[] files = fs.listStatus(new Path(val[i])); for (FileStatus file : files) { if (!file.isDir()) { conf.set(sitesOnHDFSName + i, file.getPath().toString()); } } } } }