List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java
License:Apache License
@Deprecated void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException { for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) { if (status.isDirectory()) { accumulateDatafilePaths(status.getPath(), paths); } else {//from w w w.j a v a 2s. c o m paths.add(status.getPath()); } } }
From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java
License:Apache License
@Override public List<String> list() { List<String> datasets = Lists.newArrayList(); try {//from w w w.java 2 s .c o m FileStatus[] entries = rootFileSystem.listStatus(rootDirectory, PathFilters.notHidden()); for (FileStatus entry : entries) { // assumes that all unhidden directories under the root are data sets if (entry.isDirectory() && rootFileSystem.exists(new Path(entry.getPath(), ".metadata"))) { // may want to add a check: !RESERVED_NAMES.contains(name) datasets.add(entry.getPath().getName()); } else { continue; } } } catch (FileNotFoundException ex) { // the repo hasn't created any files yet return datasets; } catch (IOException ex) { throw new MetadataProviderException("Could not list data sets", ex); } return datasets; }
From source file:com.cloudera.cdk.data.filesystem.PathIterator.java
License:Apache License
private boolean advance() { while (true) { if (directories.hasNext()) { final Path directory = directories.next(); try { final FileStatus[] stats = fs.listStatus(directory, PathFilters.notHidden()); final List<Path> nextFileSet = Lists.newArrayListWithCapacity(stats.length); for (FileStatus stat : stats) { if (stat.isFile()) { nextFileSet.add(stat.getPath()); }//from ww w. j ava 2 s . c o m } if (nextFileSet.size() > 0) { this.files = nextFileSet.iterator(); return true; } } catch (IOException ex) { throw new DatasetException("Cannot list files in " + directory, ex); } } else { return false; } } }
From source file:com.cloudera.crunch.type.avro.AvroInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)) { if (file.getPath().getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) { result.add(file);//from w w w.j ava 2 s .c om } } return result; }
From source file:com.cloudera.data.filesystem.FileSystemDataset.java
License:Apache License
@Override public Iterable<Dataset> getPartitions() { Preconditions.checkState(descriptor.isPartitioned(), "Attempt to get partitions on a non-partitioned dataset (name:%s)", name); List<Dataset> partitions = Lists.newArrayList(); FileStatus[] fileStatuses;//from w w w.ja v a 2 s.c om try { fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden()); } catch (IOException e) { throw new DatasetException("Unable to list partition directory for directory " + directory, e); } for (FileStatus stat : fileStatuses) { Path p = stat.getPath(); PartitionKey key = fromDirectoryName(p); Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem) .descriptor(new DatasetDescriptor.Builder().schema(schema).format(descriptor.getFormat()) .partitionStrategy(Accessor.getDefault().getSubpartitionStrategy(partitionStrategy, 1)) .get()) .directory(p).partitionKey(key); partitions.add(builder.get()); } return partitions; }
From source file:com.cloudera.data.filesystem.FileSystemDataset.java
License:Apache License
private void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException { for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) { if (status.isDirectory()) { accumulateDatafilePaths(status.getPath(), paths); } else {/*from w w w. j a v a2s .c o m*/ paths.add(status.getPath()); } } }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
private boolean runElasticSearchMarkerQueries() { boolean success = true; FileSystem hdfs;//ww w .j a va2 s . com FSDataInputStream in; dstPath = new Path(elasticsearchMarkerFolder); LOG.info("DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("File marker path: " + fs.getPath()); in = hdfs.open(fs.getPath()); byte[] fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); in.close(); LOG.info("cleaning markerfile @: " + fs.getPath().toString()); cleanMarkerFile(fs.getPath().toString()); sendESQuery(elasticsearchUrl, new String(fileData)); } } } } catch (Exception e) { success = false; } return success; }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
private boolean runHiveMarkerQueries() { boolean queryStatus = true; FileSystem hdfs;//from www . j av a 2s . c om FSDataInputStream in; dstPath = new Path(hiveMarkerFolder); LOG.info("DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("File marker path: " + fs.getPath()); in = hdfs.open(fs.getPath()); byte[] fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); String[] splitTab = new String(fileData).split("\t"); if (splitTab.length == 2) { dstPath = new Path(splitTab[0]); FileSystem hiveFile = dstPath.getFileSystem(conf); if (hiveFile.exists(dstPath)) { LOG.info("marker file data: " + splitTab[1]); if (runHiveQuery(splitTab[1])) { LOG.info("Marker query is successful"); in.close(); cleanMarkerFile(fs.getPath().toString()); } else { LOG.info("Error running marker query, marker point not deleted"); queryStatus = false; } } else { LOG.info("marker points to invalid hive file location, deleting the marker"); in.close(); cleanMarkerFile(fs.getPath().toString()); } } //in.close(); } } } hdfs.close(); } catch (IOException e) { LOG.error("ERROR running runMarkerQueries:" + e.getMessage()); } return queryStatus; }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) { FileSystem hdfs;// ww w . ja v a2 s .c om FSDataInputStream in; FSDataOutputStream out; List<Path> fileCollection = new ArrayList<Path>(); dstPath = new Path(folder); LOG.info("mergeFiles DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); LOG.error("Creating file @: " + hiveOutputLocation); out = hdfs.create(new Path(hiveOutputLocation)); in = hdfs.open(file); byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()]; in.readFully(fileData); out.write(fileData); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("mergeFiles File marker path: " + fs.getPath()); fileCollection.add(fs.getPath()); in = hdfs.open(fs.getPath()); fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); out.write(fileData); } } out.close(); } hdfs.close(); LOG.error("Written file: " + hiveOutputLocation); //lets start the purge process, delete all files except the merged file hdfs = dstPath.getFileSystem(conf); for (Path p : fileCollection) { if (hdfs.delete(p, false)) { LOG.error("Successfully deleted: " + p); } else { LOG.error("Error deleting file: " + p); } } } catch (IOException e) { LOG.error("ERROR running runMarkerQueries:" + e.getMessage()); } LOG.error("mergeFiles Done merging files"); return false; }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.attrs.FileHandleHandler.java
License:Apache License
@Override public FileHandle get(NFS4Handler server, Session session, FileSystem fs, FileStatus fileStatus) throws NFS4Exception { FileHandle files = new FileHandle(); files.set(server.getFileHandle(fileStatus.getPath()).getBytes()); return files; }