List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException
Return all the files that match filePattern and are not checksum files.
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Map<Path, Long> du(final boolean summary, String... strings) { if (ObjectUtils.isEmpty(strings)) { strings = new String[] { "." }; }/* w w w.ja va 2 s . c o m*/ final int BORDER = 2; Map<Path, Long> results = new PrettyPrintMap<Path, Long>(strings.length, new MapPrinter<Path, Long>() { @Override public String toString(Path path, Long size) throws Exception { if (summary) { return ("".equals(path) ? "." : path) + "\t" + size; } return String.format("%-" + (10 + BORDER) + "d", size) + path; } }); try { for (String src : strings) { Path srcPath = new Path(src); FileSystem srcFs = getFS(srcPath); FileStatus[] fileStatus = srcFs.globStatus(srcPath); if (summary) { for (FileStatus status : fileStatus) { results.put(status.getPath(), srcFs.getContentSummary(status.getPath()).getLength()); } } else { FileStatus items[] = srcFs.listStatus(FileUtil.stat2Paths(fileStatus, srcPath)); if (ObjectUtils.isEmpty(items) && (!srcFs.exists(srcPath))) { throw new HadoopException("Cannot access " + src + ": No such file or directory."); } for (FileStatus status : items) { Long size = (status.isDir() ? srcFs.getContentSummary(status.getPath()).getLength() : status.getLen()); results.put(status.getPath(), size); } } } } catch (IOException ex) { throw new HadoopException("Cannot inspect resources " + ex.getMessage(), ex); } return Collections.unmodifiableMap(results); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void getmerge(String src, String localdst, boolean addnl) { Path srcPath = new Path(src); Path dst = new Path(localdst); try {//from ww w. ja va 2 s .c o m FileSystem srcFs = getFS(srcPath); Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); for (int i = 0; i < srcs.length; i++) { if (addnl) { FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration, "\n"); } else { FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration, null); } } } catch (IOException ex) { throw new HadoopException("Cannot getmerge " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Collection<FileStatus> ls(boolean recursive, String... match) { Collection<FileStatus> results = new PrettyPrintList<FileStatus>(new ListPrinter<FileStatus>() { @Override//w w w . j a v a2 s . c o m public String toString(FileStatus stat) throws Exception { final SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm"); int maxReplication = 3, maxLen = 10, maxOwner = 10, maxGroup = 10; StringBuilder sb = new StringBuilder(); sb.append((stat.isDir() ? "d" : "-") + stat.getPermission() + " "); sb.append( String.format("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-"))); sb.append(String.format("%-" + maxOwner + "s ", stat.getOwner())); sb.append(String.format("%-" + maxGroup + "s ", stat.getGroup())); sb.append(String.format("%" + maxLen + "d ", stat.getLen())); sb.append(df.format(new Date(stat.getModificationTime())) + " "); sb.append(stat.getPath().toUri().getPath()); return sb.toString(); } }); try { for (String src : match) { Path srcPath = new Path(src); FileSystem srcFs = getFS(srcPath); FileStatus[] srcs = srcFs.globStatus(srcPath); if (!ObjectUtils.isEmpty(srcs)) { for (FileStatus status : srcs) { ls(status, srcFs, recursive, results); } } else { throw new IllegalArgumentException("Cannot access " + srcPath + ": No such file or directory."); } } return Collections.unmodifiableCollection(results); } catch (IOException ex) { throw new HadoopException("Cannot list resources " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void mv(String src, String src2, String... dst) { Object[] va = parseVarargs(src, src2, dst); @SuppressWarnings({ "unchecked" }) List<Path> sources = (List<Path>) va[0]; Path dstPath = (Path) va[1]; try {/*from w ww . j a v a2 s . c o m*/ FileSystem dstFs = getFS(dstPath); boolean isDstDir = !dstFs.isFile(dstPath); if (sources.size() > 1 && !isDstDir) { throw new IllegalArgumentException("Destination must be a dir when moving multiple files"); } for (Path srcPath : sources) { FileSystem srcFs = getFS(srcPath); URI srcURI = srcFs.getUri(); URI dstURI = dstFs.getUri(); if (srcURI.compareTo(dstURI) != 0) { throw new IllegalArgumentException("src and destination filesystems do not match."); } Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); if (srcs.length > 1 && !isDstDir) { throw new IllegalArgumentException( "When moving multiple files, destination should be a directory."); } for (Path s : srcs) { if (!srcFs.rename(s, dstPath)) { FileStatus srcFstatus = null; FileStatus dstFstatus = null; try { srcFstatus = srcFs.getFileStatus(s); } catch (FileNotFoundException e) { // ignore } try { dstFstatus = dstFs.getFileStatus(dstPath); } catch (IOException e) { } if ((srcFstatus != null) && (dstFstatus != null)) { if (srcFstatus.isDir() && !dstFstatus.isDir()) { throw new IllegalArgumentException( "cannot overwrite non directory " + dstPath + " with directory " + s); } } throw new HadoopException("Failed to rename " + s + " to " + dstPath); } } } } catch (IOException ex) { throw new HadoopException("Cannot rename resources " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void rm(boolean recursive, boolean skipTrash, String... uris) { for (String uri : uris) { try {/* www . j a va2 s . c o m*/ Path src = new Path(uri); FileSystem srcFs = getFS(src); for (Path p : FileUtil.stat2Paths(srcFs.globStatus(src), src)) { FileStatus status = srcFs.getFileStatus(p); if (status.isDir() && !recursive) { throw new IllegalStateException( "Cannot remove directory \"" + src + "\", if recursive deletion was not specified"); } if (!skipTrash) { try { Trash trashTmp = new Trash(srcFs, configuration); trashTmp.moveToTrash(p); } catch (IOException ex) { throw new HadoopException("Cannot move to Trash resource " + p, ex); } } srcFs.delete(p, recursive); } } catch (IOException ex) { throw new HadoopException("Cannot delete (all) resources " + ex.getMessage(), ex); } } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) { Assert.isTrue(replication >= 1, "Replication must be >=1"); List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null); try {// w ww .j a v a 2 s . c o m for (String uri : uris) { Path srcPath = new Path(uri); FileSystem srcFs = getFS(srcPath); Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); for (Path src : srcs) { setrep(replication, recursive, srcFs, src, waitList); } } if (waitList != null) { boolean waitUntilDone = (secondsToWait == 0); long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait); for (Path path : waitList) { FileSystem srcFs = getFS(path); FileStatus status = srcFs.getFileStatus(path); long len = status.getLen(); boolean done = false; while (!done) { BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len); int i = 0; for (; i < locations.length && locations[i].getHosts().length == replication; i++) { } done = (i == locations.length); if (!done && (waitUntilDone || timeLeft > 5000)) { try { // sleep for 10s Thread.sleep(10000); } catch (InterruptedException e) { return; } timeLeft = -1000; } } } } } catch (IOException ex) { throw new HadoopException("Cannot set replication " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Collection<String> text(String... uris) { Collection<String> texts = new PrettyPrintList<String>(new ListPrinter<String>() { @Override/* w ww . ja v a 2s .c om*/ public String toString(String e) throws Exception { return e + "\n"; } }); for (String uri : uris) { InputStream in = null; FSDataInputStream i = null; try { Path srcPat = new Path(uri); FileSystem srcFs = getFS(srcPat); for (Path src : FileUtil.stat2Paths(srcFs.globStatus(srcPat), srcPat)) { Assert.isTrue(srcFs.isFile(src), "Source must be a file"); i = srcFs.open(src); switch (i.readShort()) { case 0x1f8b: // RFC 1952 i.seek(0); in = new GZIPInputStream(i); break; case 0x5345: // 'S' 'E' if (i.readByte() == 'Q') { i.close(); in = new TextRecordInputStream(src, srcFs, configuration); } break; default: in = i; break; } i.seek(0); texts.add(getContent(in)); } } catch (IOException ex) { throw new HadoopException("Cannot read " + uri + ";" + ex.getMessage(), ex); } finally { IOUtils.closeStream(in); IOUtils.closeStream(i); } } return texts; }
From source file:org.springframework.data.hadoop.impala.hdfs.FsShellCommands.java
License:Apache License
@CliCommand(value = PREFIX + "rm", help = "Remove files in the HDFS") public void rm(@CliOption(key = { "" }, mandatory = false, specifiedDefaultValue = ".", unspecifiedDefaultValue = ".", help = "directory to be listed") final String path, @CliOption(key = {/*from w ww . j a v a2s . c om*/ "skipTrash" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "whether skip trash") final boolean skipTrash, @CliOption(key = { "recursive" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "whether with recursion") final boolean recursive) { try { Path file = new Path(path); FileSystem fs = file.getFileSystem(getHadoopConfiguration()); for (Path p : FileUtil.stat2Paths(fs.globStatus(file), file)) { FileStatus status = fs.getFileStatus(p); if (status.isDir() && !recursive) { LOG.severe("To remove directory, please use fs rm --recursive instead"); return; } if (!skipTrash) { Trash trash = new Trash(fs, getHadoopConfiguration()); trash.moveToTrash(p); } fs.delete(p, recursive); } } catch (Throwable t) { LOG.severe("run HDFS shell failed. Message is: " + t.getMessage()); } }
From source file:org.springframework.xd.shell.hadoop.FsShellCommands.java
License:Apache License
@CliCommand(value = PREFIX + "rm", help = "Remove files in the HDFS") public void rm(@CliOption(key = { "", PATH }, mandatory = false, unspecifiedDefaultValue = ".", help = "path to be deleted") final String path, @CliOption(key = {//from w w w . java2s .co m "skipTrash" }, mandatory = false, specifiedDefaultValue = TRUE, unspecifiedDefaultValue = FALSE, help = "whether to skip trash") final boolean skipTrash, @CliOption(key = { RECURSIVE }, mandatory = false, specifiedDefaultValue = TRUE, unspecifiedDefaultValue = FALSE, help = "whether to recurse") final boolean recursive) { try { Path file = new Path(path); FileSystem fs = file.getFileSystem(getHadoopConfiguration()); for (Path p : FileUtil.stat2Paths(fs.globStatus(file), file)) { FileStatus status = fs.getFileStatus(p); if (status.isDirectory() && !recursive) { LOG.error("To remove directory, please use 'fs rm </path/to/dir> --recursive' instead"); return; } if (!skipTrash) { Trash trash = new Trash(fs, getHadoopConfiguration()); trash.moveToTrash(p); } fs.delete(p, recursive); } } catch (Exception t) { LOG.error("Exception: run HDFS shell failed. Message is: " + t.getMessage()); } catch (Error t) { LOG.error("Error: run HDFS shell failed. Message is: " + t.getMessage()); } }
From source file:org.springframework.yarn.batch.partition.HdfsSplitBatchPartitionHandler.java
License:Apache License
@Override protected Set<StepExecution> createStepExecutionSplits(StepExecutionSplitter stepSplitter, StepExecution stepExecution) throws Exception { String input = stepExecution.getJobParameters().getString("input"); log.info("Input is " + input); FileSystem fs = FileSystem.get(configuration); Path path = new Path(input); FileStatus[] fileStatuses = fs.globStatus(path); Set<StepExecution> split = stepSplitter.split(stepExecution, fileStatuses.length); return split; }