Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:org.opencloudengine.flamingo.mapreduce.util.HdfsUtils.java

License:Apache License

/**
 * HDFS?  ?  ?? .//from  w w w  .  j  a  va2s  .  c  o m
 *
 * @param hdfsUrl       HDFS URL
 * @param hdfsDirectory ??  HDFS Directory URL
 * @throws java.io.IOException ??    
 */
public static void deleteFromHdfs(String hdfsUrl, String hdfsDirectory) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.default.name", hdfsUrl);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory));
    for (int i = 0; i < statuses.length; i++) {
        FileStatus fileStatus = statuses[i];
        fs.delete(fileStatus.getPath(), true);
    }
}

From source file:org.opencloudengine.flamingo.mapreduce.util.HdfsUtils.java

License:Apache License

/**
 * HDFS?  ?  ?? ./*from ww w .  java2s  .  com*/
 *
 * @param hdfsDirectory ??  HDFS Directory URL
 * @throws java.io.IOException ??    
 */
public static void deleteFromHdfs(String hdfsDirectory) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory));
    for (int i = 0; i < statuses.length; i++) {
        FileStatus fileStatus = statuses[i];
        fs.delete(fileStatus.getPath(), true);
    }
}

From source file:org.openflamingo.engine.util.HdfsUtils.java

License:Apache License

/**
 *  ?   ?  ??./*from ww  w .ja  v  a 2 s  .com*/
 *
 * @param source ?? 
 * @param target ?? 
 * @param fs     Hadoop FileSystem
 */
public static void move(String source, String target, FileSystem fs) throws Exception {
    Path srcPath = new Path(source);
    Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath);
    Path dst = new Path(target);
    if (srcs.length > 1 && !fs.getFileStatus(dst).isDir()) {
        throw new FileSystemException("When moving multiple files, destination should be a directory.");
    }
    for (int i = 0; i < srcs.length; i++) {
        if (!fs.rename(srcs[i], dst)) {
            FileStatus srcFstatus = null;
            FileStatus dstFstatus = null;
            try {
                srcFstatus = fs.getFileStatus(srcs[i]);
            } catch (FileNotFoundException e) {
                throw new FileNotFoundException(srcs[i] + ": No such file or directory");
            }
            try {
                dstFstatus = fs.getFileStatus(dst);
            } catch (IOException e) {
                // Nothing
            }
            if ((srcFstatus != null) && (dstFstatus != null)) {
                if (srcFstatus.isDir() && !dstFstatus.isDir()) {
                    throw new FileSystemException(
                            "cannot overwrite non directory " + dst + " with directory " + srcs[i]);
                }
            }
            throw new FileSystemException("Failed to rename " + srcs[i] + " to " + dst);
        }
    }
}

From source file:org.sleuthkit.hadoop.clustering.ClusterUtil.java

License:Apache License

/**
 * Read a dictionary in {@link SequenceFile} generated by
 * {@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer}
 *
 * @param conf/*from   w  w  w.j av  a2 s  .co  m*/
 * @param fs
 * @param filePattern
 *          <PATH TO DICTIONARY>/dictionary.file-*
 * @throws IOException
 */
public static String[] loadTermDictionary(Configuration conf, FileSystem fs, String filePattern)
        throws IOException {
    FileStatus[] dictionaryFiles = fs.globStatus(new Path(filePattern));
    OpenObjectIntHashMap<String> dict = new OpenObjectIntHashMap<String>();
    Writable key = new Text();
    IntWritable value = new IntWritable();
    for (FileStatus fileStatus : dictionaryFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // key is term value is id
        while (reader.next(key, value)) {
            dict.put(key.toString(), value.get());
        }
    }
    String[] dictionary = new String[dict.size()];
    for (String feature : dict.keys()) {
        dictionary[dict.get(feature)] = feature;
    }
    return dictionary;
}

From source file:org.sleuthkit.hadoop.core.SKJobFactory.java

License:Open Source License

public static void addDependencies(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] jars = fs.globStatus(new Path("/texaspete/lib/*.jar"));
    if (jars.length > 0) {
        for (FileStatus jar : jars) {
            LOG.info("Adding jar to DC/CP: " + jar.getPath());
            DistributedCache.addFileToClassPath(jar.getPath(), conf, fs);
        }/*from  ww  w .j  av a  2  s .  com*/
    } else {
        LOG.warn(
                "Did not add any jars to distributed cache. This job will probably throw a ClassNotFound exception.");
    }
}

From source file:org.smartfrog.services.hadoop.operations.utils.DfsUtils.java

License:Open Source License

/**
 * Move files that match the file pattern <i>srcPath</i>
 * to a destination file.// w w w . j av a2 s.c o m
 * When moving mutiple files, the destination must be a directory.
 * Otherwise, IOException is thrown.
 * Based on {@link org.apache.hadoop.fs.FsShell#rename(String, String)}
 *
 * @param fileSystem filesystem to work with
 * @param srcPath    a file pattern specifying source files
 * @param dstPath    a destination file/directory
 * @throws IOException for any problem
 * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
 */
public static void rename(FileSystem fileSystem, Path srcPath, Path dstPath) throws IOException {
    Path[] srcs = FileUtil.stat2Paths(fileSystem.globStatus(srcPath), srcPath);
    FileStatus destStatus = fileSystem.getFileStatus(dstPath);
    if (srcs.length > 1 && !destStatus.isDir()) {
        throw new IOException("When moving multiple files, " + "destination should be a directory.");
    }
    for (Path src : srcs) {
        if (!fileSystem.rename(src, dstPath)) {
            FileStatus srcFstatus;
            FileStatus dstFstatus;
            try {
                srcFstatus = fileSystem.getFileStatus(src);
            } catch (FileNotFoundException e) {
                FileNotFoundException fnf = new FileNotFoundException(
                        src + ": No such file or directory in " + fileSystem.getUri());
                fnf.initCause(e);
                throw fnf;
            }
            try {
                dstFstatus = fileSystem.getFileStatus(dstPath);
            } catch (IOException ignored) {
                dstFstatus = null;
            }
            if ((srcFstatus != null) && (dstFstatus != null)) {
                if (srcFstatus.isDir() && !dstFstatus.isDir()) {
                    throw new IOException("cannot overwrite non directory " + dstPath + " with directory "
                            + srcPath + " in " + fileSystem.getUri());
                }
            }
            throw new IOException(
                    "Failed to rename '" + srcPath + "' to '" + dstPath + "'" + " in " + fileSystem.getUri());
        }
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Collection<Path> cat(String... uris) {
    final Collection<Path> results = new PrettyPrintList<Path>(new ListPrinter<Path>() {
        @Override/*from   w  w w. ja  v a2s  . c  o m*/
        public String toString(Path e) throws IOException {
            try {
                final FileSystem srcFS = getFS(e);
                return getContent(srcFS.open(e));
            } catch (IOException ex) {
                return "No such file or directory " + e.toUri();
            }
        }
    });

    try {
        if (!ObjectUtils.isEmpty(uris)) {
            for (String uri : uris) {
                Path src = new Path(uri);
                FileSystem srcFS = getFS(src);
                results.addAll(Arrays.asList(FileUtil.stat2Paths(srcFS.globStatus(src), src)));
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot execute command " + ex.getMessage(), ex);
    }

    return Collections.unmodifiableCollection(results);
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void copyToLocal(boolean ignorecrc, boolean crc, String src, String localdst) {
    File dst = new File(localdst);
    Path srcpath = new Path(src);

    try {//  ww  w .j av a2 s.co  m
        FileSystem srcFs = getFS(srcpath);
        srcFs.setVerifyChecksum(ignorecrc);
        if (crc && !(srcFs instanceof ChecksumFileSystem)) {
            crc = false;
        }
        FileStatus[] srcs = srcFs.globStatus(srcpath);
        boolean dstIsDir = dst.isDirectory();
        if (srcs.length > 1 && !dstIsDir) {
            throw new IllegalArgumentException(
                    "When copying multiple files, " + "destination should be a directory.");
        }
        for (FileStatus status : srcs) {
            Path p = status.getPath();
            File f = dstIsDir ? new File(dst, p.getName()) : dst;
            copyToLocal(srcFs, p, f, crc);
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Map<Path, ContentSummary> count(final boolean quota, String... uris) {

    final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length,
            new MapPrinter<Path, ContentSummary>() {
                @Override/*from  w ww  .j av  a  2 s.co m*/
                public String toString(Path p, ContentSummary c) throws IOException {
                    return c.toString(quota) + p;
                }
            });

    for (String src : uris) {
        try {
            Path srcPath = new Path(src);
            final FileSystem fs = getFS(srcPath);
            FileStatus[] statuses = fs.globStatus(srcPath);
            Assert.notEmpty(statuses, "Can not find listing for " + src);
            for (FileStatus s : statuses) {
                Path p = s.getPath();
                results.put(p, fs.getContentSummary(p));
            }
        } catch (IOException ex) {
            throw new HadoopException("Cannot find listing " + ex.getMessage(), ex);
        }
    }

    return Collections.unmodifiableMap(results);
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void cp(String src, String src2, String... dst) {
    Object[] va = parseVarargs(src, src2, dst);
    @SuppressWarnings("unchecked")
    List<Path> srcs = (List<Path>) va[0];
    Path dstPath = (Path) va[1];

    try {//  www. j  av  a2 s.c o  m

        FileSystem dstFs = dstPath.getFileSystem(configuration);
        boolean isDestDir = !dstFs.isFile(dstPath);

        if (StringUtils.hasText(src2) || (ObjectUtils.isEmpty(dst) && dst.length > 2)) {
            if (!isDestDir) {
                throw new IllegalArgumentException("When copying multiple files, destination " + dstPath.toUri()
                        + " should be a directory.");
            }
        }

        for (Path path : srcs) {
            FileSystem srcFs = path.getFileSystem(configuration);
            Path[] from = FileUtil.stat2Paths(srcFs.globStatus(path), path);
            if (!ObjectUtils.isEmpty(from) && from.length > 1 && !isDestDir) {
                throw new IllegalArgumentException(
                        "When copying multiple files, destination should be a directory.");
            }
            for (Path fromPath : from) {
                FileUtil.copy(srcFs, fromPath, dstFs, dstPath, false, configuration);
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot copy resources " + ex.getMessage(), ex);
    }
}