List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf) throws IOException { Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>(); FileSystem fs = pointsPathDir.getFileSystem(conf); FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() { public boolean accept(Path path) { String name = path.getName(); return !(name.endsWith(".crc") || name.startsWith("_")); }/*ww w.j a v a2 s.com*/ }); for (FileStatus file : children) { Path path = file.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance(); WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class) .newInstance(); while (reader.next(key, value)) { //key is the clusterId, value is a list of points //String clusterId = value.toString(); List<String> pointList = result.get(key.get()); if (pointList == null) { pointList = new ArrayList<String>(); result.put(key.get(), pointList); } //We know we are dealing with named vectors, b/c we generated from the id field String name = ((NamedVector) value.getVector()).getName(); pointList.add(name); //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance(); } } catch (InstantiationException e) { log.error("Exception", e); } catch (IllegalAccessException e) { log.error("Exception", e); } } return result; }
From source file:com.griddynamics.jagger.storage.fs.HdfsStorage.java
License:Open Source License
@Override public Set<String> getFileNameList(String path) throws IOException { FileStatus[] listStatus = hdfsClient.getFileSystem().listStatus(new Path(path)); if (listStatus == null) return Collections.emptySet(); return new HashSet<String>(Collections2.<FileStatus, String>transform(Arrays.<FileStatus>asList(listStatus), new Function<FileStatus, String>() { @Override// ww w .j a v a2 s. c o m public String apply(FileStatus input) { return input.getPath().toString(); } })); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
private int ls(FileStatus src, FileSystem srcFs, boolean recursive, boolean printHeader) throws IOException { final String cmd = recursive ? "lsr" : "ls"; final FileStatus[] items = shellListStatus(cmd, srcFs, src); if (items == null) { return 1; } else {//ww w. j ava2s.c o m int numOfErrors = 0; if (!recursive && printHeader) { if (items.length != 0) { System.out.println("Found " + items.length + " items"); } } int maxReplication = 3, maxLen = 10, maxOwner = 0, maxGroup = 0; for (int i = 0; i < items.length; i++) { FileStatus stat = items[i]; int replication = String.valueOf(stat.getReplication()).length(); int len = String.valueOf(stat.getLen()).length(); int owner = String.valueOf(stat.getOwner()).length(); int group = String.valueOf(stat.getGroup()).length(); if (replication > maxReplication) maxReplication = replication; if (len > maxLen) maxLen = len; if (owner > maxOwner) maxOwner = owner; if (group > maxGroup) maxGroup = group; } for (int i = 0; i < items.length; i++) { FileStatus stat = items[i]; Path cur = stat.getPath(); String mdate = dateForm.format(new Date(stat.getModificationTime())); System.out.print((stat.isDir() ? "d" : "-") + stat.getPermission() + " "); System.out.printf("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-")); if (maxOwner > 0) System.out.printf("%-" + maxOwner + "s ", stat.getOwner()); if (maxGroup > 0) System.out.printf("%-" + maxGroup + "s ", stat.getGroup()); System.out.printf("%" + maxLen + "d ", stat.getLen()); System.out.print(mdate + " "); System.out.println(cur.toUri().getPath()); if (recursive && stat.isDir()) { numOfErrors += ls(stat, srcFs, recursive, printHeader); } } return numOfErrors; } }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** helper returns listStatus() */ private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) { if (!src.isDir()) { FileStatus[] files = { src };//from w ww .jav a 2 s . c om return files; } Path path = src.getPath(); try { FileStatus[] files = srcFs.listStatus(path); if (files == null) { System.err.println(cmd + ": could not get listing for '" + path + "'"); } return files; } catch (IOException e) { System.err.println( cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]); } return null; }
From source file:com.hadoop.compression.lzo.LzoIndexer.java
License:Open Source License
/** * Lzo index a given path, calling recursively to index directories when encountered. * Files are only indexed if they end in .lzo and have no existing .lzo.index file. * // w w w .j a va 2s. co m * @param lzoPath The base path to index. * @param nestingLevel For pretty printing, the nesting level. * @throws IOException */ private void indexInternal(Path lzoPath, int nestingLevel) throws IOException { FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_); FileStatus fileStatus = fs.getFileStatus(lzoPath); // Recursively walk if (fileStatus.isDir()) { LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "..."); FileStatus[] statuses = fs.listStatus(lzoPath); for (FileStatus childStatus : statuses) { indexInternal(childStatus.getPath(), nestingLevel + 1); } } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) { Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (fs.exists(lzoIndexPath)) { LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n"); } else { long startTime = System.currentTimeMillis(); long fileSize = fileStatus.getLen(); LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size " + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB..."); if (indexSingleFile(fs, lzoPath)) { long indexSize = fs.getFileStatus(lzoIndexPath).getLen(); double elapsed = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed) + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed)) + " MB/s). Index size is " + df_.format(indexSize / 1024.0) + " KB.\n"); } } } }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override protected FileStatus[] listStatus(JobConf conf) throws IOException { List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf))); boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf); Iterator<FileStatus> it = files.iterator(); while (it.hasNext()) { FileStatus fileStatus = it.next(); Path file = fileStatus.getPath(); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // Get rid of non-LZO files, unless the conf explicitly tells us to // keep them. // However, always skip over files that end with ".lzo.index", since // they are not part of the input. if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) { it.remove();/*from w ww . j av a2 s . co m*/ } } else { FileSystem fs = file.getFileSystem(conf); LzoIndex index = LzoIndex.readIndex(fs, file); indexes.put(file, index); } } return files.toArray(new FileStatus[] {}); }
From source file:com.hadoop.mapreduce.FourMcInputFormat.java
License:BSD License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); List<FileStatus> results = new ArrayList<FileStatus>(); Configuration conf = HadoopUtils.getConfiguration(job); boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false); Iterator<FileStatus> it = files.iterator(); while (it.hasNext()) { FileStatus fileStatus = it.next(); FileSystem fs = fileStatus.getPath().getFileSystem(conf); addInputPath(results, fs, fileStatus, recursive); }//from w w w .ja v a2 s . c o m LOG.debug("Total 4mc input paths to process: " + results.size()); return results; }
From source file:com.hadoop.mapreduce.FourMcInputFormat.java
License:BSD License
protected void addInputPath(List<FileStatus> results, FileSystem fs, FileStatus pathStat, boolean recursive) throws IOException { Path path = pathStat.getPath(); if (pathStat.isDir()) { if (recursive) { for (FileStatus stat : fs.listStatus(path, hiddenPathFilter)) { addInputPath(results, fs, stat, recursive); }/*w ww .jav a2s . c o m*/ } } else if (visible4mcFilter.accept(path)) { results.add(pathStat); } }
From source file:com.hadoop.mapreduce.FourMzInputFormat.java
License:BSD License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); List<FileStatus> results = new ArrayList<FileStatus>(); Configuration conf = HadoopUtils.getConfiguration(job); boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false); Iterator<FileStatus> it = files.iterator(); while (it.hasNext()) { FileStatus fileStatus = it.next(); FileSystem fs = fileStatus.getPath().getFileSystem(conf); addInputPath(results, fs, fileStatus, recursive); }//from w w w. j a v a 2s . co m LOG.debug("Total 4mz input paths to process: " + results.size()); return results; }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); FileSystem fs = FileSystem.get(job.getConfiguration()); String fileExtension = new LzopCodec().getDefaultExtension(); for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) { FileStatus fileStatus = (FileStatus) iterator.next(); Path file = fileStatus.getPath(); if (!file.toString().endsWith(fileExtension)) { //get rid of non lzo files iterator.remove();/* w w w . j a va2 s . c om*/ } else { //read the index file LzoIndex index = readIndex(file, fs); indexes.put(file, index); } } return files; }