List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean canStartMining(FIMOptions opt) throws IOException { Path path = new Path(opt.outputDir + separator + "pg"); return path.getFileSystem(new Configuration()).exists(path); }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
protected int startAprioriPhase(FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { long nrLines = -1; int i = 1;//w w w . ja v a 2s.co m boolean run = true; while (run) { if (i == 1) { String outputDir = opt.outputDir + separator + "ap" + i; String info = "Apriori Phase " + i; run = runAprioriOncPhaseOnce(opt, nrLines, i, info, outputDir, null); } else { Path path = new Path(opt.outputDir + separator + "tg" + (i - 1)); for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) { String cacheFile = status.getPath().toString(); String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length()); trieGroupCount = trieGroupCount.split("-")[1]; String outputDir = opt.outputDir + separator + "ap" + i + "-trieGroup" + trieGroupCount; System.out.println("CacheFile " + cacheFile); String info = "Apriori Phase " + i + ", Trie Group " + trieGroupCount; run = runAprioriOncPhaseOnce(opt, nrLines, i, info, outputDir, cacheFile); } } i++; } return i - 1; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startCreatePrefixGroups(FIMOptions opt, int phase) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Path path = new Path(opt.outputDir + separator + "tg" + phase); for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) { String cacheFile = status.getPath().toString(); String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length()); trieGroupCount = trieGroupCount.split("-")[1]; String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount; System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir + ", cache: " + cacheFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Create Prefix Groups"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1);//from ww w .ja v a2 s. c o m Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); conf.setInt(PREFIX_LENGTH_KEY, phase); addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Prefix Creation"); } }
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java
License:Apache License
private void getPgStartIndex(Configuration conf) { try {// w w w.j a va 2 s . c o m Path path = new Path(basePGDir); FileSystem fs = path.getFileSystem(new Configuration()); if (!fs.exists(path)) { pgStartIndex = 0; return; } int largestIx = 0; for (FileStatus file : fs.listStatus(path)) { String tmp = file.getPath().toString(); if (!tmp.contains("bucket")) { continue; } tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length()); int ix = Integer.parseInt(tmp.split("-")[1]); largestIx = Math.max(largestIx, ix); pgStartIndex += 1; } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.disteclat.PrefixComputerReducer.java
License:Apache License
private void createShortFIsFile(Context context) throws IOException { Path path = new Path(createPath(getJobAbsoluteOutputDir(context), OShortFIs, OShortFIs)); FileSystem fs = path.getFileSystem(context.getConfiguration()); shortFIsOut = new PrintStream(fs.create(path)); }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
/** * Gets the different file splits for the data based on a given number of splits * /*from w ww . j a v a 2 s. c o m*/ * @param status * file status * @param conf * hadoop configuration object * @param numberOfSplits * number of splits to split the data in * @return list of file splits * @throws IOException * thrown if the file does not exist */ public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numberOfSplits) throws IOException { List<FileSplit> splits = newArrayList(); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); } long totalNumberOfLines = getTotalNumberOfLines(conf, fileName); int numLinesPerSplit = (int) Math.ceil(1.0 * totalNumberOfLines / numberOfSplits); LineReader lr = null; FSDataInputStream in = null; try { in = fileName.getFileSystem(conf).open(fileName); lr = new LineReader(in, conf); Text line = new Text(); int numLines = 0; long begin = 0; long length = 0; int num = -1; while ((num = lr.readLine(line)) > 0) { numLines++; length += num; if (numLines == numLinesPerSplit) { splits.add(createFileSplit(fileName, begin, length)); begin += length; length = 0; numLines = 0; } } if (numLines != 0) { splits.add(createFileSplit(fileName, begin, length)); } } finally { if (lr != null) { lr.close(); } if (in != null) { in.close(); } } return splits; }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
/** * Gets the total number of lines from the file. If Config.NUMBER_OF_LINES_KEY is set, this value is returned. * /*from w ww .j a v a 2s. c o m*/ * @param conf * hadoop configuration object * @param fileName * name of file to count * @return the number of lines in the file * @throws IOException */ public static long getTotalNumberOfLines(Configuration conf, Path fileName) throws IOException { long nrLines = conf.getLong(NUMBER_OF_LINES_KEY, -1); if (nrLines != -1) { return nrLines; } try { FSDataInputStream in = fileName.getFileSystem(conf).open(fileName); LineReader lr = new LineReader(in, conf); Text text = new Text(); nrLines = 0; while (lr.readLine(text) > 0) { nrLines++; } in.close(); return nrLines; } catch (IOException e) { e.printStackTrace(); } return 0; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
/** * Cleans the Hadoop file system by deleting the specified files if they exist. * //ww w .j a va2 s . c o m * @param files * the files to delete */ public static void cleanDirs(String... files) { System.out.println("[Cleaning]: Cleaning HDFS"); Configuration conf = new Configuration(); for (String filename : files) { System.out.println("[Cleaning]: Trying to delete " + filename); Path path = new Path(filename); try { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { if (fs.delete(path, true)) { System.out.println("[Cleaning]: Deleted " + filename); } else { System.out.println("[Cleaning]: Error while deleting " + filename); } } else { System.out.println("[Cleaning]: " + filename + " does not exist on HDFS"); } } catch (IOException e) { e.printStackTrace(); } } }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static String getJobAbsoluteOutputDir(@SuppressWarnings("rawtypes") Context context) { try {/* w w w . j a v a 2s .c o m*/ Path path = new Path(context.getConfiguration().get(OUTPUT_DIR_KEY)); FileSystem fs = path.getFileSystem(context.getConfiguration()); return fs.getFileStatus(path).getPath().toString(); } catch (IOException e) { } return ""; }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) { Path path = new Path(dir); try {/*from w w w .j a va2 s . c om*/ for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) { String[] sp = fs.getPath().toString().split(Path.SEPARATOR); String filename = sp[sp.length - 1]; if (toKeep.contains(filename)) { cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS"); continue; } cleanDirs(fs.getPath().toString()); } } catch (IOException e) { e.printStackTrace(); } }