Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf)
        throws IOException {
    Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>();

    FileSystem fs = pointsPathDir.getFileSystem(conf);
    FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
        public boolean accept(Path path) {
            String name = path.getName();
            return !(name.endsWith(".crc") || name.startsWith("_"));
        }/*ww  w.j  a v  a2  s.com*/
    });

    for (FileStatus file : children) {
        Path path = file.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        try {
            IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance();
            WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class)
                    .newInstance();
            while (reader.next(key, value)) {
                //key is the clusterId, value is a list of points
                //String clusterId = value.toString();
                List<String> pointList = result.get(key.get());
                if (pointList == null) {
                    pointList = new ArrayList<String>();
                    result.put(key.get(), pointList);
                }
                //We know we are dealing with named vectors, b/c we generated from the id field
                String name = ((NamedVector) value.getVector()).getName();
                pointList.add(name);
                //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
            }
        } catch (InstantiationException e) {
            log.error("Exception", e);
        } catch (IllegalAccessException e) {
            log.error("Exception", e);
        }
    }

    return result;
}

From source file:com.griddynamics.jagger.storage.fs.HdfsStorage.java

License:Open Source License

@Override
public Set<String> getFileNameList(String path) throws IOException {
    FileStatus[] listStatus = hdfsClient.getFileSystem().listStatus(new Path(path));
    if (listStatus == null)
        return Collections.emptySet();
    return new HashSet<String>(Collections2.<FileStatus, String>transform(Arrays.<FileStatus>asList(listStatus),
            new Function<FileStatus, String>() {
                @Override//  ww  w  .j a  v a2  s.  c o m
                public String apply(FileStatus input) {
                    return input.getPath().toString();
                }
            }));
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

private int ls(FileStatus src, FileSystem srcFs, boolean recursive, boolean printHeader) throws IOException {
    final String cmd = recursive ? "lsr" : "ls";
    final FileStatus[] items = shellListStatus(cmd, srcFs, src);
    if (items == null) {
        return 1;
    } else {//ww w.  j ava2s.c o  m
        int numOfErrors = 0;
        if (!recursive && printHeader) {
            if (items.length != 0) {
                System.out.println("Found " + items.length + " items");
            }
        }

        int maxReplication = 3, maxLen = 10, maxOwner = 0, maxGroup = 0;

        for (int i = 0; i < items.length; i++) {
            FileStatus stat = items[i];
            int replication = String.valueOf(stat.getReplication()).length();
            int len = String.valueOf(stat.getLen()).length();
            int owner = String.valueOf(stat.getOwner()).length();
            int group = String.valueOf(stat.getGroup()).length();

            if (replication > maxReplication)
                maxReplication = replication;
            if (len > maxLen)
                maxLen = len;
            if (owner > maxOwner)
                maxOwner = owner;
            if (group > maxGroup)
                maxGroup = group;
        }

        for (int i = 0; i < items.length; i++) {
            FileStatus stat = items[i];
            Path cur = stat.getPath();
            String mdate = dateForm.format(new Date(stat.getModificationTime()));

            System.out.print((stat.isDir() ? "d" : "-") + stat.getPermission() + " ");
            System.out.printf("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-"));
            if (maxOwner > 0)
                System.out.printf("%-" + maxOwner + "s ", stat.getOwner());
            if (maxGroup > 0)
                System.out.printf("%-" + maxGroup + "s ", stat.getGroup());
            System.out.printf("%" + maxLen + "d ", stat.getLen());
            System.out.print(mdate + " ");
            System.out.println(cur.toUri().getPath());
            if (recursive && stat.isDir()) {
                numOfErrors += ls(stat, srcFs, recursive, printHeader);
            }
        }
        return numOfErrors;
    }
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/** helper returns listStatus() */
private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) {
    if (!src.isDir()) {
        FileStatus[] files = { src };//from w ww  .jav a 2 s . c  om
        return files;
    }
    Path path = src.getPath();
    try {
        FileStatus[] files = srcFs.listStatus(path);
        if (files == null) {
            System.err.println(cmd + ": could not get listing for '" + path + "'");
        }
        return files;
    } catch (IOException e) {
        System.err.println(
                cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]);
    }
    return null;
}

From source file:com.hadoop.compression.lzo.LzoIndexer.java

License:Open Source License

/**
 * Lzo index a given path, calling recursively to index directories when encountered.
 * Files are only indexed if they end in .lzo and have no existing .lzo.index file.
 * // w  w  w  .j  a  va  2s. co  m
 * @param lzoPath The base path to index.
 * @param nestingLevel For pretty printing, the nesting level.
 * @throws IOException
 */
private void indexInternal(Path lzoPath, int nestingLevel) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_);
    FileStatus fileStatus = fs.getFileStatus(lzoPath);

    // Recursively walk
    if (fileStatus.isDir()) {
        LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "...");
        FileStatus[] statuses = fs.listStatus(lzoPath);
        for (FileStatus childStatus : statuses) {
            indexInternal(childStatus.getPath(), nestingLevel + 1);
        }
    } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) {
        Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX);
        if (fs.exists(lzoIndexPath)) {
            LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n");
        } else {
            long startTime = System.currentTimeMillis();
            long fileSize = fileStatus.getLen();

            LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size "
                    + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB...");
            if (indexSingleFile(fs, lzoPath)) {
                long indexSize = fs.getFileStatus(lzoIndexPath).getLen();
                double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
                LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed)
                        + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed))
                        + " MB/s).  Index size is " + df_.format(indexSize / 1024.0) + " KB.\n");
            }
        }
    }
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
protected FileStatus[] listStatus(JobConf conf) throws IOException {
    List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf)));

    boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf);

    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        Path file = fileStatus.getPath();

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // Get rid of non-LZO files, unless the conf explicitly tells us to
            // keep them.
            // However, always skip over files that end with ".lzo.index", since
            // they are not part of the input.
            if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) {
                it.remove();/*from  w  ww . j  av  a2 s  .  co  m*/
            }
        } else {
            FileSystem fs = file.getFileSystem(conf);
            LzoIndex index = LzoIndex.readIndex(fs, file);
            indexes.put(file, index);
        }
    }

    return files.toArray(new FileStatus[] {});
}

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);
    List<FileStatus> results = new ArrayList<FileStatus>();
    Configuration conf = HadoopUtils.getConfiguration(job);
    boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false);
    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        FileSystem fs = fileStatus.getPath().getFileSystem(conf);
        addInputPath(results, fs, fileStatus, recursive);
    }//from  w w w  .ja v a2 s .  c  o  m

    LOG.debug("Total 4mc input paths to process: " + results.size());
    return results;
}

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

protected void addInputPath(List<FileStatus> results, FileSystem fs, FileStatus pathStat, boolean recursive)
        throws IOException {
    Path path = pathStat.getPath();
    if (pathStat.isDir()) {
        if (recursive) {
            for (FileStatus stat : fs.listStatus(path, hiddenPathFilter)) {
                addInputPath(results, fs, stat, recursive);
            }/*w  ww  .jav  a2s  .  c  o  m*/
        }
    } else if (visible4mcFilter.accept(path)) {
        results.add(pathStat);
    }
}

From source file:com.hadoop.mapreduce.FourMzInputFormat.java

License:BSD License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);
    List<FileStatus> results = new ArrayList<FileStatus>();
    Configuration conf = HadoopUtils.getConfiguration(job);
    boolean recursive = conf.getBoolean("mapred.input.dir.recursive", false);
    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        FileSystem fs = fileStatus.getPath().getFileSystem(conf);
        addInputPath(results, fs, fileStatus, recursive);
    }//from  w w w. j  a v a  2s . co  m

    LOG.debug("Total 4mz input paths to process: " + results.size());
    return results;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String fileExtension = new LzopCodec().getDefaultExtension();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = (FileStatus) iterator.next();
        Path file = fileStatus.getPath();

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();/* w w w .  j  a va2 s  .  c  om*/
        } else {
            //read the index file
            LzoIndex index = readIndex(file, fs);
            indexes.put(file, index);
        }
    }

    return files;
}