Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Map<Path, Long> du(final boolean summary, String... strings) {
    if (ObjectUtils.isEmpty(strings)) {
        strings = new String[] { "." };
    }/*  w  w w.ja  va 2 s  . c o m*/

    final int BORDER = 2;

    Map<Path, Long> results = new PrettyPrintMap<Path, Long>(strings.length, new MapPrinter<Path, Long>() {

        @Override
        public String toString(Path path, Long size) throws Exception {
            if (summary) {
                return ("".equals(path) ? "." : path) + "\t" + size;
            }
            return String.format("%-" + (10 + BORDER) + "d", size) + path;
        }
    });

    try {
        for (String src : strings) {
            Path srcPath = new Path(src);
            FileSystem srcFs = getFS(srcPath);
            FileStatus[] fileStatus = srcFs.globStatus(srcPath);
            if (summary) {
                for (FileStatus status : fileStatus) {
                    results.put(status.getPath(), srcFs.getContentSummary(status.getPath()).getLength());
                }
            } else {
                FileStatus items[] = srcFs.listStatus(FileUtil.stat2Paths(fileStatus, srcPath));
                if (ObjectUtils.isEmpty(items) && (!srcFs.exists(srcPath))) {
                    throw new HadoopException("Cannot access " + src + ": No such file or directory.");
                }
                for (FileStatus status : items) {
                    Long size = (status.isDir() ? srcFs.getContentSummary(status.getPath()).getLength()
                            : status.getLen());
                    results.put(status.getPath(), size);
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot inspect resources " + ex.getMessage(), ex);
    }

    return Collections.unmodifiableMap(results);
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void getmerge(String src, String localdst, boolean addnl) {
    Path srcPath = new Path(src);
    Path dst = new Path(localdst);
    try {//from   ww  w. ja va 2  s .c o m
        FileSystem srcFs = getFS(srcPath);
        Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
        for (int i = 0; i < srcs.length; i++) {
            if (addnl) {
                FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false,
                        configuration, "\n");
            } else {
                FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false,
                        configuration, null);
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot getmerge " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Collection<FileStatus> ls(boolean recursive, String... match) {

    Collection<FileStatus> results = new PrettyPrintList<FileStatus>(new ListPrinter<FileStatus>() {
        @Override//w  w  w .  j  a v a2 s .  c  o m
        public String toString(FileStatus stat) throws Exception {
            final SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm");
            int maxReplication = 3, maxLen = 10, maxOwner = 10, maxGroup = 10;

            StringBuilder sb = new StringBuilder();
            sb.append((stat.isDir() ? "d" : "-") + stat.getPermission() + " ");
            sb.append(
                    String.format("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-")));
            sb.append(String.format("%-" + maxOwner + "s ", stat.getOwner()));
            sb.append(String.format("%-" + maxGroup + "s ", stat.getGroup()));
            sb.append(String.format("%" + maxLen + "d ", stat.getLen()));
            sb.append(df.format(new Date(stat.getModificationTime())) + " ");
            sb.append(stat.getPath().toUri().getPath());
            return sb.toString();
        }
    });

    try {
        for (String src : match) {
            Path srcPath = new Path(src);

            FileSystem srcFs = getFS(srcPath);
            FileStatus[] srcs = srcFs.globStatus(srcPath);
            if (!ObjectUtils.isEmpty(srcs)) {
                for (FileStatus status : srcs) {
                    ls(status, srcFs, recursive, results);
                }
            } else {
                throw new IllegalArgumentException("Cannot access " + srcPath + ": No such file or directory.");
            }
        }

        return Collections.unmodifiableCollection(results);

    } catch (IOException ex) {
        throw new HadoopException("Cannot list resources " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void mv(String src, String src2, String... dst) {
    Object[] va = parseVarargs(src, src2, dst);
    @SuppressWarnings({ "unchecked" })
    List<Path> sources = (List<Path>) va[0];
    Path dstPath = (Path) va[1];

    try {/*from w  ww  . j a  v a2 s . c  o  m*/
        FileSystem dstFs = getFS(dstPath);
        boolean isDstDir = !dstFs.isFile(dstPath);

        if (sources.size() > 1 && !isDstDir) {
            throw new IllegalArgumentException("Destination must be a dir when moving multiple files");
        }

        for (Path srcPath : sources) {
            FileSystem srcFs = getFS(srcPath);
            URI srcURI = srcFs.getUri();
            URI dstURI = dstFs.getUri();
            if (srcURI.compareTo(dstURI) != 0) {
                throw new IllegalArgumentException("src and destination filesystems do not match.");
            }
            Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
            if (srcs.length > 1 && !isDstDir) {
                throw new IllegalArgumentException(
                        "When moving multiple files, destination should be a directory.");
            }
            for (Path s : srcs) {
                if (!srcFs.rename(s, dstPath)) {
                    FileStatus srcFstatus = null;
                    FileStatus dstFstatus = null;
                    try {
                        srcFstatus = srcFs.getFileStatus(s);
                    } catch (FileNotFoundException e) {
                        // ignore
                    }
                    try {
                        dstFstatus = dstFs.getFileStatus(dstPath);
                    } catch (IOException e) {
                    }
                    if ((srcFstatus != null) && (dstFstatus != null)) {
                        if (srcFstatus.isDir() && !dstFstatus.isDir()) {
                            throw new IllegalArgumentException(
                                    "cannot overwrite non directory " + dstPath + " with directory " + s);
                        }
                    }
                    throw new HadoopException("Failed to rename " + s + " to " + dstPath);
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot rename resources " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void rm(boolean recursive, boolean skipTrash, String... uris) {
    for (String uri : uris) {
        try {/* www .  j  a va2 s  . c  o m*/
            Path src = new Path(uri);
            FileSystem srcFs = getFS(src);

            for (Path p : FileUtil.stat2Paths(srcFs.globStatus(src), src)) {
                FileStatus status = srcFs.getFileStatus(p);
                if (status.isDir() && !recursive) {
                    throw new IllegalStateException(
                            "Cannot remove directory \"" + src + "\", if recursive deletion was not specified");
                }
                if (!skipTrash) {
                    try {
                        Trash trashTmp = new Trash(srcFs, configuration);
                        trashTmp.moveToTrash(p);
                    } catch (IOException ex) {
                        throw new HadoopException("Cannot move to Trash resource " + p, ex);
                    }
                }
                srcFs.delete(p, recursive);
            }
        } catch (IOException ex) {
            throw new HadoopException("Cannot delete (all) resources " + ex.getMessage(), ex);
        }
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) {
    Assert.isTrue(replication >= 1, "Replication must be >=1");

    List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null);

    try {//  w ww  .j a  v a 2 s . c o m
        for (String uri : uris) {
            Path srcPath = new Path(uri);
            FileSystem srcFs = getFS(srcPath);
            Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
            for (Path src : srcs) {
                setrep(replication, recursive, srcFs, src, waitList);
            }
        }

        if (waitList != null) {
            boolean waitUntilDone = (secondsToWait == 0);
            long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait);

            for (Path path : waitList) {
                FileSystem srcFs = getFS(path);
                FileStatus status = srcFs.getFileStatus(path);
                long len = status.getLen();

                boolean done = false;

                while (!done) {
                    BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len);
                    int i = 0;
                    for (; i < locations.length && locations[i].getHosts().length == replication; i++) {
                    }
                    done = (i == locations.length);

                    if (!done && (waitUntilDone || timeLeft > 5000)) {
                        try {
                            // sleep for 10s
                            Thread.sleep(10000);
                        } catch (InterruptedException e) {
                            return;
                        }
                        timeLeft = -1000;
                    }
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot set replication " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Collection<String> text(String... uris) {
    Collection<String> texts = new PrettyPrintList<String>(new ListPrinter<String>() {

        @Override/*  w ww  .  ja v a  2s .c om*/
        public String toString(String e) throws Exception {
            return e + "\n";
        }
    });

    for (String uri : uris) {

        InputStream in = null;
        FSDataInputStream i = null;

        try {
            Path srcPat = new Path(uri);
            FileSystem srcFs = getFS(srcPat);

            for (Path src : FileUtil.stat2Paths(srcFs.globStatus(srcPat), srcPat)) {
                Assert.isTrue(srcFs.isFile(src), "Source must be a file");
                i = srcFs.open(src);
                switch (i.readShort()) {
                case 0x1f8b: // RFC 1952
                    i.seek(0);
                    in = new GZIPInputStream(i);
                    break;
                case 0x5345: // 'S' 'E'
                    if (i.readByte() == 'Q') {
                        i.close();
                        in = new TextRecordInputStream(src, srcFs, configuration);
                    }
                    break;
                default:
                    in = i;
                    break;
                }
                i.seek(0);
                texts.add(getContent(in));
            }
        } catch (IOException ex) {
            throw new HadoopException("Cannot read " + uri + ";" + ex.getMessage(), ex);
        } finally {
            IOUtils.closeStream(in);
            IOUtils.closeStream(i);
        }
    }
    return texts;
}

From source file:org.springframework.data.hadoop.impala.hdfs.FsShellCommands.java

License:Apache License

@CliCommand(value = PREFIX + "rm", help = "Remove files in the HDFS")
public void rm(@CliOption(key = {
        "" }, mandatory = false, specifiedDefaultValue = ".", unspecifiedDefaultValue = ".", help = "directory to be listed") final String path,
        @CliOption(key = {/*from   w ww  . j a  v  a2s .  c om*/
                "skipTrash" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "whether skip trash") final boolean skipTrash,
        @CliOption(key = {
                "recursive" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "whether with recursion") final boolean recursive) {
    try {
        Path file = new Path(path);
        FileSystem fs = file.getFileSystem(getHadoopConfiguration());
        for (Path p : FileUtil.stat2Paths(fs.globStatus(file), file)) {
            FileStatus status = fs.getFileStatus(p);
            if (status.isDir() && !recursive) {
                LOG.severe("To remove directory, please use fs rm --recursive instead");
                return;
            }
            if (!skipTrash) {
                Trash trash = new Trash(fs, getHadoopConfiguration());
                trash.moveToTrash(p);
            }
            fs.delete(p, recursive);
        }
    } catch (Throwable t) {
        LOG.severe("run HDFS shell failed. Message is: " + t.getMessage());
    }

}

From source file:org.springframework.xd.shell.hadoop.FsShellCommands.java

License:Apache License

@CliCommand(value = PREFIX + "rm", help = "Remove files in the HDFS")
public void rm(@CliOption(key = { "",
        PATH }, mandatory = false, unspecifiedDefaultValue = ".", help = "path to be deleted") final String path,
        @CliOption(key = {//from w w  w . java2s .co  m
                "skipTrash" }, mandatory = false, specifiedDefaultValue = TRUE, unspecifiedDefaultValue = FALSE, help = "whether to skip trash") final boolean skipTrash,
        @CliOption(key = {
                RECURSIVE }, mandatory = false, specifiedDefaultValue = TRUE, unspecifiedDefaultValue = FALSE, help = "whether to recurse") final boolean recursive) {
    try {
        Path file = new Path(path);
        FileSystem fs = file.getFileSystem(getHadoopConfiguration());
        for (Path p : FileUtil.stat2Paths(fs.globStatus(file), file)) {
            FileStatus status = fs.getFileStatus(p);
            if (status.isDirectory() && !recursive) {
                LOG.error("To remove directory, please use 'fs rm </path/to/dir> --recursive' instead");
                return;
            }
            if (!skipTrash) {
                Trash trash = new Trash(fs, getHadoopConfiguration());
                trash.moveToTrash(p);
            }
            fs.delete(p, recursive);
        }
    } catch (Exception t) {
        LOG.error("Exception: run HDFS shell failed. Message is: " + t.getMessage());
    } catch (Error t) {
        LOG.error("Error: run HDFS shell failed. Message is: " + t.getMessage());
    }
}

From source file:org.springframework.yarn.batch.partition.HdfsSplitBatchPartitionHandler.java

License:Apache License

@Override
protected Set<StepExecution> createStepExecutionSplits(StepExecutionSplitter stepSplitter,
        StepExecution stepExecution) throws Exception {

    String input = stepExecution.getJobParameters().getString("input");
    log.info("Input is " + input);

    FileSystem fs = FileSystem.get(configuration);
    Path path = new Path(input);
    FileStatus[] fileStatuses = fs.globStatus(path);

    Set<StepExecution> split = stepSplitter.split(stepExecution, fileStatuses.length);
    return split;
}