Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.cloudera.cdk.data.filesystem.FileSystemDataset.java

License:Apache License

@Deprecated
void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {//from  w w w.j  a v a 2s. c o m
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemMetadataProvider.java

License:Apache License

@Override
public List<String> list() {
    List<String> datasets = Lists.newArrayList();
    try {//from w  w w.java  2 s  .c o m
        FileStatus[] entries = rootFileSystem.listStatus(rootDirectory, PathFilters.notHidden());
        for (FileStatus entry : entries) {
            // assumes that all unhidden directories under the root are data sets
            if (entry.isDirectory() && rootFileSystem.exists(new Path(entry.getPath(), ".metadata"))) {
                // may want to add a check: !RESERVED_NAMES.contains(name)
                datasets.add(entry.getPath().getName());
            } else {
                continue;
            }
        }
    } catch (FileNotFoundException ex) {
        // the repo hasn't created any files yet
        return datasets;
    } catch (IOException ex) {
        throw new MetadataProviderException("Could not list data sets", ex);
    }
    return datasets;
}

From source file:com.cloudera.cdk.data.filesystem.PathIterator.java

License:Apache License

private boolean advance() {
    while (true) {
        if (directories.hasNext()) {
            final Path directory = directories.next();
            try {
                final FileStatus[] stats = fs.listStatus(directory, PathFilters.notHidden());
                final List<Path> nextFileSet = Lists.newArrayListWithCapacity(stats.length);
                for (FileStatus stat : stats) {
                    if (stat.isFile()) {
                        nextFileSet.add(stat.getPath());
                    }//from  ww w. j ava  2 s  .  c o m
                }
                if (nextFileSet.size() > 0) {
                    this.files = nextFileSet.iterator();
                    return true;
                }
            } catch (IOException ex) {
                throw new DatasetException("Cannot list files in " + directory, ex);
            }
        } else {
            return false;
        }
    }
}

From source file:com.cloudera.crunch.type.avro.AvroInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    for (FileStatus file : super.listStatus(job)) {
        if (file.getPath().getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) {
            result.add(file);//from  w w w.j ava 2 s  .c om
        }
    }
    return result;
}

From source file:com.cloudera.data.filesystem.FileSystemDataset.java

License:Apache License

@Override
public Iterable<Dataset> getPartitions() {
    Preconditions.checkState(descriptor.isPartitioned(),
            "Attempt to get partitions on a non-partitioned dataset (name:%s)", name);

    List<Dataset> partitions = Lists.newArrayList();

    FileStatus[] fileStatuses;//from   w w  w.ja  v  a 2 s.c om

    try {
        fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden());
    } catch (IOException e) {
        throw new DatasetException("Unable to list partition directory for directory " + directory, e);
    }

    for (FileStatus stat : fileStatuses) {
        Path p = stat.getPath();
        PartitionKey key = fromDirectoryName(p);
        Builder builder = new FileSystemDataset.Builder().name(name).fileSystem(fileSystem)
                .descriptor(new DatasetDescriptor.Builder().schema(schema).format(descriptor.getFormat())
                        .partitionStrategy(Accessor.getDefault().getSubpartitionStrategy(partitionStrategy, 1))
                        .get())
                .directory(p).partitionKey(key);

        partitions.add(builder.get());
    }

    return partitions;
}

From source file:com.cloudera.data.filesystem.FileSystemDataset.java

License:Apache License

private void accumulateDatafilePaths(Path directory, List<Path> paths) throws IOException {

    for (FileStatus status : fileSystem.listStatus(directory, PathFilters.notHidden())) {

        if (status.isDirectory()) {
            accumulateDatafilePaths(status.getPath(), paths);
        } else {/*from   w w  w. j a v a2s  .c o  m*/
            paths.add(status.getPath());
        }
    }
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runElasticSearchMarkerQueries() {
    boolean success = true;
    FileSystem hdfs;//ww w .j  a va2  s  .  com
    FSDataInputStream in;
    dstPath = new Path(elasticsearchMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    in.close();
                    LOG.info("cleaning markerfile @: " + fs.getPath().toString());
                    cleanMarkerFile(fs.getPath().toString());
                    sendESQuery(elasticsearchUrl, new String(fileData));

                }
            }
        }
    } catch (Exception e) {
        success = false;
    }
    return success;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runHiveMarkerQueries() {
    boolean queryStatus = true;
    FileSystem hdfs;//from www .  j  av  a 2s . c  om
    FSDataInputStream in;
    dstPath = new Path(hiveMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    String[] splitTab = new String(fileData).split("\t");
                    if (splitTab.length == 2) {
                        dstPath = new Path(splitTab[0]);
                        FileSystem hiveFile = dstPath.getFileSystem(conf);
                        if (hiveFile.exists(dstPath)) {
                            LOG.info("marker file data: " + splitTab[1]);
                            if (runHiveQuery(splitTab[1])) {
                                LOG.info("Marker query is successful");
                                in.close();
                                cleanMarkerFile(fs.getPath().toString());
                            } else {
                                LOG.info("Error running marker query, marker point not deleted");
                                queryStatus = false;
                            }

                        } else {
                            LOG.info("marker points to invalid hive file location, deleting the marker");
                            in.close();
                            cleanMarkerFile(fs.getPath().toString());
                        }
                    }
                    //in.close();
                }
            }
        }
        hdfs.close();
    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }

    return queryStatus;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;//  ww  w . ja v  a2 s .c  om
    FSDataInputStream in;
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.attrs.FileHandleHandler.java

License:Apache License

@Override
public FileHandle get(NFS4Handler server, Session session, FileSystem fs, FileStatus fileStatus)
        throws NFS4Exception {
    FileHandle files = new FileHandle();
    files.set(server.getFileHandle(fileStatus.getPath()).getBytes());
    return files;
}