List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.google.mr4c.sources.HDFSFileSource.java
License:Open Source License
public static HDFSFileSource create(Path dir, boolean flat) throws IOException { return create(dir.getFileSystem(new Configuration()), dir, flat); }
From source file:com.google.mr4c.sources.MapFileSource.java
License:Open Source License
public MapFileSource(Path dir) throws IOException { this(dir.getFileSystem(new Configuration()), dir); }
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf) throws IOException { Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>(); FileSystem fs = pointsPathDir.getFileSystem(conf); FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() { public boolean accept(Path path) { String name = path.getName(); return !(name.endsWith(".crc") || name.startsWith("_")); }//from w ww . j a v a 2 s. c o m }); for (FileStatus file : children) { Path path = file.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance(); WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class) .newInstance(); while (reader.next(key, value)) { //key is the clusterId, value is a list of points //String clusterId = value.toString(); List<String> pointList = result.get(key.get()); if (pointList == null) { pointList = new ArrayList<String>(); result.put(key.get(), pointList); } //We know we are dealing with named vectors, b/c we generated from the id field String name = ((NamedVector) value.getVector()).getName(); pointList.add(name); //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance(); } } catch (InstantiationException e) { log.error("Exception", e); } catch (IllegalAccessException e) { log.error("Exception", e); } } return result; }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** * Return the {@link FileSystem} specified by src and the conf. * It the {@link FileSystem} supports checksum, set verifyChecksum. *///from ww w . j a v a2 s .com private FileSystem getSrcFileSystem(Path src, boolean verifyChecksum) throws IOException { FileSystem srcFs = src.getFileSystem(getConf()); srcFs.setVerifyChecksum(verifyChecksum); return srcFs; }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
void text(String srcf) throws IOException { Path srcPattern = new Path(srcf); new DelayedExceptionThrowing() { @Override//from w w w .j av a 2 s . c o m void process(Path p, FileSystem srcFs) throws IOException { if (srcFs.isDirectory(p)) { throw new IOException("Source must be a file."); } printToStdout(forMagic(p, srcFs)); } }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf())); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** * Get a listing of all files in that match the file pattern <i>srcf</i>. * @param srcf a file pattern specifying source files * @param recursive if need to list files in subdirs * @throws IOException // www. j av a 2 s. c om * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) */ private int ls(String srcf, boolean recursive) throws IOException { Path srcPath = new Path(srcf); FileSystem srcFs = srcPath.getFileSystem(this.getConf()); FileStatus[] srcs = srcFs.globStatus(srcPath); if (srcs == null || srcs.length == 0) { throw new FileNotFoundException("Cannot access " + srcf + ": No such file or directory."); } boolean printHeader = (srcs.length == 1) ? true : false; int numOfErrors = 0; for (int i = 0; i < srcs.length; i++) { numOfErrors += ls(srcs[i], srcFs, recursive, printHeader); } return numOfErrors == 0 ? 0 : -1; }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException { int errors = 0; for (int i = startIndex; i < args.length; i++) { Path srcPath = new Path(args[i]); FileSystem srcFs = srcPath.getFileSystem(getConf()); Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); // if nothing matches to given glob pattern then increment error count if (paths.length == 0) { System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'"); errors++;/*from w w w . ja v a 2s .c om*/ } for (Path path : paths) { try { FileStatus file = srcFs.getFileStatus(path); if (file == null) { System.err.println(handler.getName() + ": could not get status for '" + path + "'"); errors++; } else { errors += runCmdHandler(handler, file, srcFs, recursive); } } catch (IOException e) { String msg = (e.getMessage() != null ? e.getLocalizedMessage() : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null")); System.err.println( handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]); errors++; } } } return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0; }
From source file:com.hadoop.mapred.DeprecatedLzoLineRecordReader.java
License:Open Source License
DeprecatedLzoLineRecordReader(Configuration conf, FileSplit split) throws IOException { start = split.getStart();/*from ww w.j a v a 2 s . co m*/ end = start + split.getLength(); final Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); codecFactory = new CompressionCodecFactory(conf); final CompressionCodec codec = codecFactory.getCodec(file); if (codec == null) { throw new IOException("No LZO codec found, cannot run."); } // Open the file and seek to the next split. fileIn = fs.open(file); // Create input stream and read the file header. in = new LineReader(codec.createInputStream(fileIn), conf); if (start != 0) { fileIn.seek(start); // Read and ignore the first line. in.readLine(new Text()); start = fileIn.getPos(); } pos = start; }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override protected FileStatus[] listStatus(JobConf conf) throws IOException { List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf))); boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf); Iterator<FileStatus> it = files.iterator(); while (it.hasNext()) { FileStatus fileStatus = it.next(); Path file = fileStatus.getPath(); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // Get rid of non-LZO files, unless the conf explicitly tells us to // keep them. // However, always skip over files that end with ".lzo.index", since // they are not part of the input. if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) { it.remove();/* w w w. java 2 s . c o m*/ } } else { FileSystem fs = file.getFileSystem(conf); LzoIndex index = LzoIndex.readIndex(fs, file); indexes.put(file, index); } } return files.toArray(new FileStatus[] {}); }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits); // Find new starts/ends of the filesplit that align with the LZO blocks. List<FileSplit> result = new ArrayList<FileSplit>(); for (FileSplit fileSplit : splits) { Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // non-LZO file, keep the input split as is. result.add(fileSplit);//from ww w .j av a 2 s . c o m continue; } // LZO file, try to split if the .index file was found LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); } if (index.isEmpty()) { // Empty index, keep it as is. result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long lzoStart = index.alignSliceStartToIndex(start, end); long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) { result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations())); } } return result.toArray(new FileSplit[result.size()]); }