List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
@Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException { if (Utilities.isVectorMode(conf)) { return new VectorizedOrcInputFormat().validateInput(fs, conf, files); }/* w ww. j a va 2s .c o m*/ if (files.size() <= 0) { return false; } for (FileStatus file : files) { try { OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { return false; } } return true; }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException { for (FileStatus stat : fs.listStatus(directory)) { String name = stat.getPath().getName(); String numberPart = name.substring(0, name.indexOf('_')); if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart) && Integer.parseInt(numberPart) == bucket) { return stat.getPath(); }// w ww. j av a2s . co m } throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory); }
From source file:com.blm.orc.VectorizedOrcInputFormat.java
License:Apache License
@Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException { if (files.size() <= 0) { return false; }/*ww w.ja v a 2 s .c om*/ for (FileStatus file : files) { try { OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { return false; } } return true; }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void go(String srcDir) throws URISyntaxException, IOException { hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig); System.out.println();// w w w . j av a2s . co m System.out.println("Processing files from " + srcDir); System.out.println(); logger.debug("Searching for files in {}", srcDir); Path path = new Path(srcDir); if (!hdfsFs.exists(path)) { System.out.println("The path does not exist - cannot continue : " + path.toString()); return; } FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return !name.startsWith("_") && name.endsWith(".avro"); } }); for (FileStatus fs : statuses) { try { Path inPath = fs.getPath(); long fileSize = hdfsFs.getFileStatus(inPath).getLen(); System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize)); testFileAndFix(inPath); } catch (Exception e) { // don't care about the cause, the test should be able to read all files it cares about e.printStackTrace(); } } }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
@Override public boolean delete(Path path, boolean recursive) throws IOException { Path absolutePath = makeAbsolute(path); INode inode = store.retrieveINode(absolutePath); if (inode == null) { return false; }/*from w ww . j a v a2s . c o m*/ if (inode.isFile()) { store.deleteINode(absolutePath); for (Block block : inode.getBlocks()) { store.deleteBlock(block); } } else { FileStatus[] contents = null; try { contents = listStatus(absolutePath); } catch (FileNotFoundException fnfe) { return false; } if ((contents.length != 0) && (!recursive)) { throw new IOException("Directory " + path.toString() + " is not empty."); } for (FileStatus p : contents) { if (!delete(p.getPath(), recursive)) { return false; } } store.deleteINode(absolutePath); } return true; }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * Read the Frequent Patterns generated from Text * /*from w w w .j a v a 2 s. c o m*/ * @return List of TopK patterns for each string frequent feature */ public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException { Configuration conf = new Configuration(); Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS); FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf); FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN)); List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList(); for (FileStatus fileStatus : outputFiles) { ret.addAll(FPGrowth.readFrequentPattern(conf, fileStatus.getPath())); } return ret; }
From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java
License:Apache License
/** * list all directory of th dir//from w ww. j a va 2 s. c o m * @param dir * file dircetory path * @return * directory list * @throws IOException * exceptions during handle BSP file */ public FileStatus[] listDirAll(String dir) throws IOException { // Path path = new Path(dir); // FileStatus[] status = fs.listStatus(path); for (FileStatus s : bspfs.listStatus(new BSPHdfsImpl().newPath(dir))) { System.out.println(s.getPath()); } return bspfs.listStatus(new BSPHdfsImpl().newPath(dir)); }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * * @param job/*from w ww .ja v a 2s . co m*/ * The current BSPJob job * @return input splits */ @Override public List<InputSplit> getSplits(BSPJob job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus file : listStatus(job)) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConf()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = 0L; if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) { if (job.getSplitSize() == 0L) { splitSize = blockSize; } else { splitSize = job.getSplitSize(); } } else { if (job.getSplitSize() == 0L) { splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } else { splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } } LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB"); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } LOG.info("[Split Number] " + splits.size()); return splits; }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * List input directories. Subclasses may override to, e.g., select only files * matching a regular expression./*from w ww.j a va 2 s .c o m*/ * * @param job * the job to list input paths for * @return array of FileStatus objects * @throws IOException * if zero items. */ protected List<FileStatus> listStatus(BSPJob job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(HIDDEN_FILE_FILTER); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConf()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java
License:Apache License
@Override @Deprecated/*from w w w. j a v a2 s.com*/ public Iterable<Dataset<E>> getPartitions() { Preconditions.checkState(descriptor.isPartitioned(), "Attempt to get partitions on a non-partitioned dataset (name:%s)", name); List<Dataset<E>> partitions = Lists.newArrayList(); FileStatus[] fileStatuses; try { fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden()); } catch (IOException e) { throw new DatasetException("Unable to list partition directory for directory " + directory, e); } for (FileStatus stat : fileStatuses) { Path p = fileSystem.makeQualified(stat.getPath()); PartitionKey key = fromDirectoryName(p); PartitionStrategy subPartitionStrategy = Accessor.getDefault() .getSubpartitionStrategy(partitionStrategy, 1); Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem) .descriptor(new DatasetDescriptor.Builder(descriptor).location(p) .partitionStrategy(subPartitionStrategy).build()) .partitionKey(key); partitions.add(builder.<E>build()); } return partitions; }