Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java

License:Apache License

@Deprecated
void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {//from  w w w.j  a v a 2s. c o m
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java

License:Apache License

@Override
public List<String> list() {
    List<String> datasets = Lists.newArrayList();
    try {//from w  w w.java  2 s  .c o m
        FileStatus[] entries = rootFileSystem.listStatus(rootDirectory, PathFilters.notHidden());
        for (FileStatus entry : entries) {
            // assumes that all unhidden directories under the root are data sets
            if (entry.isDirectory() && rootFileSystem.exists(new Path(entry.getPath(), ".metadata"))) {
                // may want to add a check: !RESERVED_NAMES.contains(name)
                datasets.add(entry.getPath().getName());
            } else {
                continue;
            }
        }
    } catch (FileNotFoundException ex) {
        // the repo hasn't created any files yet
        return datasets;
    } catch (IOException ex) {
        throw new MetadataProviderException("Could not list data sets", ex);
    }
    return datasets;
}

From source file:com.cloudera.cdk.data.filesystem.PathIterator.java

License:Apache License

private boolean advance() {
    while (true) {
        if (directories.hasNext()) {
            final Path directory = directories.next();
            try {
                final FileStatus[] stats = fs.listStatus(directory, PathFilters.notHidden());
                final List<Path> nextFileSet = Lists.newArrayListWithCapacity(stats.length);
                for (FileStatus stat : stats) {
                    if (stat.isFile()) {
                        nextFileSet.add(stat.getPath());
                    }//from  ww w. j ava  2 s  .  c o m
                }
                if (nextFileSet.size() > 0) {
                    this.files = nextFileSet.iterator();
                    return true;
                }
            } catch (IOException ex) {
                throw new DatasetException("Cannot list files in " + directory, ex);
            }
        } else {
            return false;
        }
    }
}

From source file:com.cloudera.crunch.type.avro.AvroInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    for (FileStatus file : super.listStatus(job)) {
        if (file.getPath().getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) {
            result.add(file);//from  w w w.j ava 2 s  .c om
        }
    }
    return result;
}

From source file:com.cloudera.data.filesystem.FileSystemDataset.java

License:Apache License

@Override
public Iterable<Dataset> getPartitions() {
    Preconditions.checkState(descriptor.isPartitioned(),
            "Attempt to get partitions on a non-partitioned dataset (name:%s)", name);

    List<Dataset> partitions = Lists.newArrayList();

    FileStatus[] fileStatuses;//from   w w  w.ja  v  a 2 s.c om

    try {
        fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden());
    } catch (IOException e) {
        throw new DatasetException("Unable to list partition directory for directory " + directory, e);
    }

    for (FileStatus stat : fileStatuses) {
        Path p = stat.getPath();
        PartitionKey key = fromDirectoryName(p);
        Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem)
                .descriptor(new DatasetDescriptor.Builder().schema(schema).format(descriptor.getFormat())
                        .partitionStrategy(Accessor.getDefault().getSubpartitionStrategy(partitionStrategy, 1))
                        .get())
                .directory(p).partitionKey(key);

        partitions.add(builder.get());
    }

    return partitions;
}

From source file:com.cloudera.data.filesystem.FileSystemDataset.java

License:Apache License

private void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {/*from   w w  w. j a v a2s  .c o  m*/
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runElasticSearchMarkerQueries() {
    boolean success = true;
    FileSystem hdfs;//ww w .j  a va2  s  .  com
    FSDataInputStream in;
    dstPath = new Path(elasticsearchMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    in.close();
                    LOG.info("cleaning markerfile @: " + fs.getPath().toString());
                    cleanMarkerFile(fs.getPath().toString());
                    sendESQuery(elasticsearchUrl, new String(fileData));

                }
            }
        }
    } catch (Exception e) {
        success = false;
    }
    return success;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runHiveMarkerQueries() {
    boolean queryStatus = true;
    FileSystem hdfs;//from www .  j  av  a 2s . c  om
    FSDataInputStream in;
    dstPath = new Path(hiveMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    String[] splitTab = new String(fileData).split("\t");
                    if (splitTab.length == 2) {
                        dstPath = new Path(splitTab[0]);
                        FileSystem hiveFile = dstPath.getFileSystem(conf);
                        if (hiveFile.exists(dstPath)) {
                            LOG.info("marker file data: " + splitTab[1]);
                            if (runHiveQuery(splitTab[1])) {
                                LOG.info("Marker query is successful");
                                in.close();
                                cleanMarkerFile(fs.getPath().toString());
                            } else {
                                LOG.info("Error running marker query, marker point not deleted");
                                queryStatus = false;
                            }

                        } else {
                            LOG.info("marker points to invalid hive file location, deleting the marker");
                            in.close();
                            cleanMarkerFile(fs.getPath().toString());
                        }
                    }
                    //in.close();
                }
            }
        }
        hdfs.close();
    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }

    return queryStatus;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;//  ww  w . ja v  a2 s .c  om
    FSDataInputStream in;
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.attrs.FileHandleHandler.java

License:Apache License

@Override
public FileHandle get(NFS4Handler server, Session session, FileSystem fs, FileStatus fileStatus)
        throws NFS4Exception {
    FileHandle files = new FileHandle();
    files.set(server.getFileHandle(fileStatus.getPath()).getBytes());
    return files;
}