Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf)
        throws IOException {
    Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>();

    FileSystem fs = pointsPathDir.getFileSystem(conf);
    FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
        public boolean accept(Path path) {
            String name = path.getName();
            return !(name.endsWith(".crc") || name.startsWith("_"));
        }/*ww  w.j  a v  a2  s.com*/
    });

    for (FileStatus file : children) {
        Path path = file.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        try {
            IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance();
            WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class)
                    .newInstance();
            while (reader.next(key, value)) {
                //key is the clusterId, value is a list of points
                //String clusterId = value.toString();
                List<String> pointList = result.get(key.get());
                if (pointList == null) {
                    pointList = new ArrayList<String>();
                    result.put(key.get(), pointList);
                }
                //We know we are dealing with named vectors, b/c we generated from the id field
                String name = ((NamedVector) value.getVector()).getName();
                pointList.add(name);
                //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
            }
        } catch (InstantiationException e) {
            log.error("Exception", e);
        } catch (IllegalAccessException e) {
            log.error("Exception", e);
        }
    }

    return result;
}

From source file:com.griddynamics.jagger.storage.fs.HdfsStorage.java

License:Open Source License

@Override
public Set<String> getFileNameList(String path) throws IOException {
    FileStatus[] listStatus = hdfsClient.getFileSystem().listStatus(new Path(path));
    if (listStatus == null)
        return Collections.emptySet();
    return new HashSet<String>(Collections2.<FileStatus, String>transform(Arrays.<FileStatus>asList(listStatus),
            new Function<FileStatus, String>() {
                @Override//  ww  w  .j a  v a2  s.  c o m
                public String apply(FileStatus input) {
                    return input.getPath().toString();
                }
            }));
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

private int ls(FileStatus src, FileSystem srcFs, boolean recursive, boolean printHeader) throws IOException {
    final String cmd = recursive ? "lsr" : "ls";
    final FileStatus[] items = shellListStatus(cmd, srcFs, src);
    if (items == null) {
        return 1;
    } else {//ww w.  j ava2s.c o  m
        int numOfErrors = 0;
        if (!recursive && printHeader) {
            if (items.length != 0) {
                System.out.println("Found " + items.length + " items");
            }
        }

        int maxReplication = 3, maxLen = 10, maxOwner = 0, maxGroup = 0;

        for (int i = 0; i < items.length; i++) {
            FileStatus stat = items[i];
            int replication = String.valueOf(stat.getReplication()).length();
            int len = String.valueOf(stat.getLen()).length();
            int owner = String.valueOf(stat.getOwner()).length();
            int group = String.valueOf(stat.getGroup()).length();

            if (replication > maxReplication)
                maxReplication = replication;
            if (len > maxLen)
                maxLen = len;
            if (owner > maxOwner)
                maxOwner = owner;
            if (group > maxGroup)
                maxGroup = group;
        }

        for (int i = 0; i < items.length; i++) {
            FileStatus stat = items[i];
            Path cur = stat.getPath();
            String mdate = dateForm.format(new Date(stat.getModificationTime()));

            System.out.print((stat.isDir() ? "d" : "-") + stat.getPermission() + " ");
            System.out.printf("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-"));
            if (maxOwner > 0)
                System.out.printf("%-" + maxOwner + "s ", stat.getOwner());
            if (maxGroup > 0)
                System.out.printf("%-" + maxGroup + "s ", stat.getGroup());
            System.out.printf("%" + maxLen + "d ", stat.getLen());
            System.out.print(mdate + " ");
            System.out.println(cur.toUri().getPath());
            if (recursive && stat.isDir()) {
                numOfErrors += ls(stat, srcFs, recursive, printHeader);
            }
        }
        return numOfErrors;
    }
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/** helper returns listStatus() */
private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) {
    if (!src.isDir()) {
        FileStatus[] files = { src };//from w ww  .jav a 2 s . c  om
        return files;
    }
    Path path = src.getPath();
    try {
        FileStatus[] files = srcFs.listStatus(path);
        if (files == null) {
            System.err.println(cmd + ": could not get listing for '" + path + "'");
        }
        return files;
    } catch (IOException e) {
        System.err.println(
                cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]);
    }
    return null;
}

From source file:com.hadoop.compression.lzo.LzoIndexer.java

License:Open Source License

/**
 * Lzo index a given path, calling recursively to index directories when encountered.
 * Files are only indexed if they end in .lzo and have no existing .lzo.index file.
 * // w  w  w  .j  a  va  2s. co  m
 * @param lzoPath The base path to index.
 * @param nestingLevel For pretty printing, the nesting level.
 * @throws IOException
 */
private void indexInternal(Path lzoPath, int nestingLevel) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_);
    FileStatus fileStatus = fs.getFileStatus(lzoPath);

    // Recursively walk
    if (fileStatus.isDir()) {
        LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "...");
        FileStatus[] statuses = fs.listStatus(lzoPath);
        for (FileStatus childStatus : statuses) {
            indexInternal(childStatus.getPath(), nestingLevel + 1);
        }
    } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) {
        Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX);
        if (fs.exists(lzoIndexPath)) {
            LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n");
        } else {
            long startTime = System.currentTimeMillis();
            long fileSize = fileStatus.getLen();

            LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size "
                    + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB...");
            if (indexSingleFile(fs, lzoPath)) {
                long indexSize = fs.getFileStatus(lzoIndexPath).getLen();
                double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
                LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed)
                        + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed))
                        + " MB/s).  Index size is " + df_.format(indexSize / 1024.0) + " KB.\n");
            }
        }
    }
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
protected FileStatus[] listStatus(JobConf conf) throws IOException {
    List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf)));

    boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf);

    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        Path file = fileStatus.getPath();

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // Get rid of non-LZO files, unless the conf explicitly tells us to
            // keep them.
            // However, always skip over files that end with ".lzo.index", since
            // they are not part of the input.
            if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) {
                it.remove();/*from  w  ww . j  av  a2 s  .  co  m*/
            }
        } else {
            FileSystem fs = file.getFileSystem(conf);
            LzoIndex index = LzoIndex.readIndex(fs, file);
            indexes.put(file, index);
        }
    }

    return files.toArray(new FileStatus[] {});
}

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);
    List<FileStatus> results = new ArrayList<FileStatus>();
    Configuration conf = HadoopUtils.getConfiguration(job);
    boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false);
    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        FileSystem fs = fileStatus.getPath().getFileSystem(conf);
        addInputPath(results, fs, fileStatus, recursive);
    }//from  w w w  .ja v a2 s .  c  o  m

    LOG.debug("Total 4mc input paths to process: " + results.size());
    return results;
}

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

protected void addInputPath(List<FileStatus> results, FileSystem fs, FileStatus pathStat, boolean recursive)
        throws IOException {
    Path path = pathStat.getPath();
    if (pathStat.isDir()) {
        if (recursive) {
            for (FileStatus stat : fs.listStatus(path, hiddenPathFilter)) {
                addInputPath(results, fs, stat, recursive);
            }/*w  ww  .jav  a2s  .  c  o  m*/
        }
    } else if (visible4mcFilter.accept(path)) {
        results.add(pathStat);
    }
}

From source file:com.hadoop.mapreduce.FourMzInputFormat.java

License:BSD License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);
    List<FileStatus> results = new ArrayList<FileStatus>();
    Configuration conf = HadoopUtils.getConfiguration(job);
    boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false);
    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        FileSystem fs = fileStatus.getPath().getFileSystem(conf);
        addInputPath(results, fs, fileStatus, recursive);
    }//from  w w w. j  a v a  2s . co  m

    LOG.debug("Total 4mz input paths to process: " + results.size());
    return results;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String fileExtension = new LzopCodec().getDefaultExtension();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = (FileStatus) iterator.next();
        Path file = fileStatus.getPath();

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();/* w w w .  j  a va2 s  .  c  om*/
        } else {
            //read the index file
            LzoIndex index = readIndex(file, fs);
            indexes.put(file, index);
        }
    }

    return files;
}