List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked", "finally" }) @Override/*from w ww .jav a 2 s .c o m*/ public void prepareToWrite(RecordWriter writer) throws IOException { if (dimensionPath != null) { Path p = new Path(dimensionPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = reader.readLine(); this.dimensions = Integer.parseInt(line); } catch (NumberFormatException nfe) { LOG.error("Unexpected input for dimensions", nfe); throw new IOException(); } finally { if (reader != null) { reader.close(); } // TODO: SMELLY: Why loop if we always cancel after the first file? break; } } } } this.writer = writer; }
From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedIndexWithKeys(Path dictionaryPath) throws IOException { Map<Integer, String> index = null; FileSystem fs = null;//w ww .ja va 2 s. c o m try { fs = FileSystem.get(dictionaryPath.toUri(), new Configuration()); index = loadInvertedIndexWithKeys(fs, dictionaryPath); } finally { if (fs != null) { fs.close(); } } return index; }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * @param args/*from w w w.j a v a2 s .c o m*/ * @return * @throws IOException * @throws ParseException */ public Job initJob(String[] args) throws IOException, ParseException { Path inputPath = null; Path loadPath = null; String outputPath = null; boolean useSpecifiedPaths = false; for (int idx = 0; idx < args.length; idx++) { if ("-f".equals(args[idx])) { useSpecifiedPaths = true; loadPath = new Path(args[++idx]); } else if (idx == args.length - 1) { outputPath = args[idx]; } else { inputPath = new Path(args[idx]); } } Path mrOutputPath = new Path(NAME + "-results"); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.set("backup.input.path", inputPath.toString()); conf.set("backup.output.path", outputPath); FileSystem inputFs = null; FileSystem outputFs = null; Path[] inputSources = null; try { inputFs = FileSystem.get(inputPath.toUri(), new Configuration()); outputFs = FileSystem.get(getConf()); if (useSpecifiedPaths) { inputSources = createInputSources(loadPaths(outputFs, loadPath), outputFs); } else { inputSources = createInputSources(getPaths(inputFs, inputPath, 0, 2), outputFs); } } finally { checkAndClose(inputFs); checkAndClose(outputFs); } Job job = new Job(getConf()); job.setJobName(NAME); job.setJarByClass(Backup.class); job.setMapperClass(BackupMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); for (Path source : inputSources) { System.out.println("Adding input path: " + source.toString()); FileInputFormat.addInputPath(job, source); } FileOutputFormat.setOutputPath(job, mrOutputPath); return job; }
From source file:com.mozilla.hadoop.fs.SequenceFileDirectoryReader.java
License:Apache License
public SequenceFileDirectoryReader(Path inputPath) throws IOException { fs = FileSystem.get(inputPath.toUri(), conf); paths = new ArrayList<Path>(); for (FileStatus status : fs.listStatus(inputPath)) { Path p = status.getPath(); if (!status.isDir() && !p.getName().startsWith("_")) { paths.add(p);// w ww . j a v a2s. co m } } pathIter = paths.iterator(); }
From source file:com.mozilla.hadoop.fs.TextFileDirectoryReader.java
License:Apache License
public TextFileDirectoryReader(Path inputPath) throws IOException { fs = FileSystem.get(inputPath.toUri(), conf); paths = new ArrayList<Path>(); for (FileStatus status : fs.listStatus(inputPath)) { Path p = status.getPath(); if (!status.isDir() && !p.getName().startsWith("_")) { paths.add(p);//from w w w. j a va 2 s . c o m } } pathIter = paths.iterator(); }
From source file:com.nagarro.nteg.utils.HDFSDirectoryFilesDataReader.java
License:Apache License
public HDFSDirectoryFilesDataReader(final String dirPathName, final int batchSize) throws IOException { super(dirPathName, batchSize); final Path hdfsPath = new Path(dirPathName); hdfs = FileSystem.get(hdfsPath.toUri(), new Configuration()); if (!hdfs.getFileStatus(hdfsPath).isDirectory()) { hdfs.close();//from ww w. j a v a 2 s. co m throw new IllegalArgumentException(dirPathName + " is not a directory or program doesn't have sufficient permissions to access it"); } }
From source file:com.nearinfinity.blur.analysis.BlurAnalyzer.java
License:Apache License
public static BlurAnalyzer create(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); FSDataInputStream inputStream = fileSystem.open(path); try {// www . j a v a 2s . c om return create(inputStream); } finally { inputStream.close(); } }
From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java
License:Apache License
protected void remove(Path directoryPath) throws IOException { FileSystem fileSystem = FileSystem.get(directoryPath.toUri(), _configuration); fileSystem.delete(directoryPath, true); }
From source file:com.nearinfinity.blur.mapreduce.BlurTask.java
License:Apache License
public int getNumReducers(Configuration configuration) { Path tablePath = new Path(_tableDescriptor.tableUri); try {/*ww w . j ava 2s . co m*/ int num = _tableDescriptor.shardCount; FileSystem fileSystem = FileSystem.get(tablePath.toUri(), configuration); if (!fileSystem.exists(tablePath)) { return num; } FileStatus[] files = fileSystem.listStatus(tablePath); int shardCount = 0; for (FileStatus fileStatus : files) { if (fileStatus.isDir()) { String name = fileStatus.getPath().getName(); if (name.startsWith(BlurConstants.SHARD_PREFIX)) { shardCount++; } } } if (shardCount == 0) { return num; } if (shardCount != num) { LOG.warn("Asked for " + num + " reducers, but existing table " + _tableDescriptor.name + " has " + shardCount + " shards. Using " + shardCount + " reducers"); } return shardCount; } catch (IOException e) { throw new RuntimeException("Unable to connect to filesystem", e); } }
From source file:com.nearinfinity.blur.utils.BlurUtil.java
License:Apache License
public static void setupFileSystem(String uri, int shardCount) throws IOException { Path tablePath = new Path(uri); FileSystem fileSystem = FileSystem.get(tablePath.toUri(), new Configuration()); if (createPath(fileSystem, tablePath)) { LOG.info("Table uri existed."); validateShardCount(shardCount, fileSystem, tablePath); }//from w w w .j a va2 s . c o m for (int i = 0; i < shardCount; i++) { String shardName = BlurUtil.getShardName(SHARD_PREFIX, i); Path shardPath = new Path(tablePath, shardName); createPath(fileSystem, shardPath); } }