List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException
Return all the files that match filePattern and are not checksum files.
From source file:org.opencloudengine.flamingo.mapreduce.util.HdfsUtils.java
License:Apache License
/** * HDFS? ? ?? .//from w w w . j a va2s . c o m * * @param hdfsUrl HDFS URL * @param hdfsDirectory ?? HDFS Directory URL * @throws java.io.IOException ?? */ public static void deleteFromHdfs(String hdfsUrl, String hdfsDirectory) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", hdfsUrl); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory)); for (int i = 0; i < statuses.length; i++) { FileStatus fileStatus = statuses[i]; fs.delete(fileStatus.getPath(), true); } }
From source file:org.opencloudengine.flamingo.mapreduce.util.HdfsUtils.java
License:Apache License
/** * HDFS? ? ?? ./*from ww w . java2s . com*/ * * @param hdfsDirectory ?? HDFS Directory URL * @throws java.io.IOException ?? */ public static void deleteFromHdfs(String hdfsDirectory) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory)); for (int i = 0; i < statuses.length; i++) { FileStatus fileStatus = statuses[i]; fs.delete(fileStatus.getPath(), true); } }
From source file:org.openflamingo.engine.util.HdfsUtils.java
License:Apache License
/** * ? ? ??./*from ww w .ja v a 2 s .com*/ * * @param source ?? * @param target ?? * @param fs Hadoop FileSystem */ public static void move(String source, String target, FileSystem fs) throws Exception { Path srcPath = new Path(source); Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath); Path dst = new Path(target); if (srcs.length > 1 && !fs.getFileStatus(dst).isDir()) { throw new FileSystemException("When moving multiple files, destination should be a directory."); } for (int i = 0; i < srcs.length; i++) { if (!fs.rename(srcs[i], dst)) { FileStatus srcFstatus = null; FileStatus dstFstatus = null; try { srcFstatus = fs.getFileStatus(srcs[i]); } catch (FileNotFoundException e) { throw new FileNotFoundException(srcs[i] + ": No such file or directory"); } try { dstFstatus = fs.getFileStatus(dst); } catch (IOException e) { // Nothing } if ((srcFstatus != null) && (dstFstatus != null)) { if (srcFstatus.isDir() && !dstFstatus.isDir()) { throw new FileSystemException( "cannot overwrite non directory " + dst + " with directory " + srcs[i]); } } throw new FileSystemException("Failed to rename " + srcs[i] + " to " + dst); } } }
From source file:org.sleuthkit.hadoop.clustering.ClusterUtil.java
License:Apache License
/** * Read a dictionary in {@link SequenceFile} generated by * {@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer} * * @param conf/*from w w w.j av a2 s .co m*/ * @param fs * @param filePattern * <PATH TO DICTIONARY>/dictionary.file-* * @throws IOException */ public static String[] loadTermDictionary(Configuration conf, FileSystem fs, String filePattern) throws IOException { FileStatus[] dictionaryFiles = fs.globStatus(new Path(filePattern)); OpenObjectIntHashMap<String> dict = new OpenObjectIntHashMap<String>(); Writable key = new Text(); IntWritable value = new IntWritable(); for (FileStatus fileStatus : dictionaryFiles) { Path path = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // key is term value is id while (reader.next(key, value)) { dict.put(key.toString(), value.get()); } } String[] dictionary = new String[dict.size()]; for (String feature : dict.keys()) { dictionary[dict.get(feature)] = feature; } return dictionary; }
From source file:org.sleuthkit.hadoop.core.SKJobFactory.java
License:Open Source License
public static void addDependencies(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] jars = fs.globStatus(new Path("/texaspete/lib/*.jar")); if (jars.length > 0) { for (FileStatus jar : jars) { LOG.info("Adding jar to DC/CP: " + jar.getPath()); DistributedCache.addFileToClassPath(jar.getPath(), conf, fs); }/*from ww w .j av a 2 s . com*/ } else { LOG.warn( "Did not add any jars to distributed cache. This job will probably throw a ClassNotFound exception."); } }
From source file:org.smartfrog.services.hadoop.operations.utils.DfsUtils.java
License:Open Source License
/** * Move files that match the file pattern <i>srcPath</i> * to a destination file.// w w w . j av a2 s.c o m * When moving mutiple files, the destination must be a directory. * Otherwise, IOException is thrown. * Based on {@link org.apache.hadoop.fs.FsShell#rename(String, String)} * * @param fileSystem filesystem to work with * @param srcPath a file pattern specifying source files * @param dstPath a destination file/directory * @throws IOException for any problem * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) */ public static void rename(FileSystem fileSystem, Path srcPath, Path dstPath) throws IOException { Path[] srcs = FileUtil.stat2Paths(fileSystem.globStatus(srcPath), srcPath); FileStatus destStatus = fileSystem.getFileStatus(dstPath); if (srcs.length > 1 && !destStatus.isDir()) { throw new IOException("When moving multiple files, " + "destination should be a directory."); } for (Path src : srcs) { if (!fileSystem.rename(src, dstPath)) { FileStatus srcFstatus; FileStatus dstFstatus; try { srcFstatus = fileSystem.getFileStatus(src); } catch (FileNotFoundException e) { FileNotFoundException fnf = new FileNotFoundException( src + ": No such file or directory in " + fileSystem.getUri()); fnf.initCause(e); throw fnf; } try { dstFstatus = fileSystem.getFileStatus(dstPath); } catch (IOException ignored) { dstFstatus = null; } if ((srcFstatus != null) && (dstFstatus != null)) { if (srcFstatus.isDir() && !dstFstatus.isDir()) { throw new IOException("cannot overwrite non directory " + dstPath + " with directory " + srcPath + " in " + fileSystem.getUri()); } } throw new IOException( "Failed to rename '" + srcPath + "' to '" + dstPath + "'" + " in " + fileSystem.getUri()); } } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Collection<Path> cat(String... uris) { final Collection<Path> results = new PrettyPrintList<Path>(new ListPrinter<Path>() { @Override/*from w w w. ja v a2s . c o m*/ public String toString(Path e) throws IOException { try { final FileSystem srcFS = getFS(e); return getContent(srcFS.open(e)); } catch (IOException ex) { return "No such file or directory " + e.toUri(); } } }); try { if (!ObjectUtils.isEmpty(uris)) { for (String uri : uris) { Path src = new Path(uri); FileSystem srcFS = getFS(src); results.addAll(Arrays.asList(FileUtil.stat2Paths(srcFS.globStatus(src), src))); } } } catch (IOException ex) { throw new HadoopException("Cannot execute command " + ex.getMessage(), ex); } return Collections.unmodifiableCollection(results); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void copyToLocal(boolean ignorecrc, boolean crc, String src, String localdst) { File dst = new File(localdst); Path srcpath = new Path(src); try {// ww w .j av a2 s.co m FileSystem srcFs = getFS(srcpath); srcFs.setVerifyChecksum(ignorecrc); if (crc && !(srcFs instanceof ChecksumFileSystem)) { crc = false; } FileStatus[] srcs = srcFs.globStatus(srcpath); boolean dstIsDir = dst.isDirectory(); if (srcs.length > 1 && !dstIsDir) { throw new IllegalArgumentException( "When copying multiple files, " + "destination should be a directory."); } for (FileStatus status : srcs) { Path p = status.getPath(); File f = dstIsDir ? new File(dst, p.getName()) : dst; copyToLocal(srcFs, p, f, crc); } } catch (IOException ex) { throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Map<Path, ContentSummary> count(final boolean quota, String... uris) { final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length, new MapPrinter<Path, ContentSummary>() { @Override/*from w ww .j av a 2 s.co m*/ public String toString(Path p, ContentSummary c) throws IOException { return c.toString(quota) + p; } }); for (String src : uris) { try { Path srcPath = new Path(src); final FileSystem fs = getFS(srcPath); FileStatus[] statuses = fs.globStatus(srcPath); Assert.notEmpty(statuses, "Can not find listing for " + src); for (FileStatus s : statuses) { Path p = s.getPath(); results.put(p, fs.getContentSummary(p)); } } catch (IOException ex) { throw new HadoopException("Cannot find listing " + ex.getMessage(), ex); } } return Collections.unmodifiableMap(results); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void cp(String src, String src2, String... dst) { Object[] va = parseVarargs(src, src2, dst); @SuppressWarnings("unchecked") List<Path> srcs = (List<Path>) va[0]; Path dstPath = (Path) va[1]; try {// www. j av a2 s.c o m FileSystem dstFs = dstPath.getFileSystem(configuration); boolean isDestDir = !dstFs.isFile(dstPath); if (StringUtils.hasText(src2) || (ObjectUtils.isEmpty(dst) && dst.length > 2)) { if (!isDestDir) { throw new IllegalArgumentException("When copying multiple files, destination " + dstPath.toUri() + " should be a directory."); } } for (Path path : srcs) { FileSystem srcFs = path.getFileSystem(configuration); Path[] from = FileUtil.stat2Paths(srcFs.globStatus(path), path); if (!ObjectUtils.isEmpty(from) && from.length > 1 && !isDestDir) { throw new IllegalArgumentException( "When copying multiple files, destination should be a directory."); } for (Path fromPath : from) { FileUtil.copy(srcFs, fromPath, dstFs, dstPath, false, configuration); } } } catch (IOException ex) { throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex); } }