Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:com.constellio.sdk.tests.FactoriesTestFeatures.java

private void deleteFromHadoop(String user, String url) {
    System.setProperty("HADOOP_USER_NAME", user);
    Configuration hadoopConfig = new Configuration();

    if (url == null || user == null) {
        throw new RuntimeException("No config");
    }/*from w  ww  .j  av  a 2s .  c o  m*/

    hadoopConfig.set("fs.defaultFS", url);
    hadoopConfig.set("hadoop.job.ugi", user);

    try {
        FileSystem hdfs = FileSystem.get(hadoopConfig);
        for (FileStatus file : hdfs.globStatus(new Path("*"))) {
            hdfs.delete(file.getPath(), true);
        }

    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java

License:Apache License

@DriverInit
public void copyFilesToWorking() throws IOException {
    // Copy the input files into the 'workingDir'
    FileSystem fs = FileSystem.get(getConf());

    this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString());
    fs.mkdirs(workingDirectory);//from w  ww . j av  a  2 s .  c  om

    FileStatus[] files = fs.globStatus(new Path(context.getInput()));
    for (FileStatus file : files) {
        Path dest = new Path(workingDirectory, file.getPath().getName());
        FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf());
    }
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

protected List<FileStatus> getInputFiles(Path input) throws IOException {
    FileSystem fs = FileSystem.get(getConf());
    List<FileStatus> status = new ArrayList<>();
    if (fs.exists(input)) {
        FileStatus inputStatus = fs.getFileStatus(input);
        if (inputStatus.isDirectory()) {
            // Move all files under this directory
            status = Arrays.asList(fs.listStatus(input));
        } else {/*from   w w w. j  a v a2  s.co  m*/
            status.add(inputStatus);
        }
    }
    // Must be a glob path
    else {
        FileStatus[] statusAry = fs.globStatus(input);
        status.addAll(Arrays.asList(statusAry));
    }
    return status;
}

From source file:com.datasalt.pangool.utils.HadoopUtils.java

License:Apache License

/**
 * Reads maps of integer -> double from glob paths like "folder/part-r*"
 *///w ww  .j  a v  a  2s.  c o m
public static HashMap<Integer, Double> readIntDoubleMapFromGlob(Path glob, FileSystem fs) throws IOException {
    FileStatus status[] = fs.globStatus(glob);
    HashMap<Integer, Double> ret = new HashMap<Integer, Double>();
    for (FileStatus fileS : status) {
        ret.putAll(readIntDoubleMap(fileS.getPath(), fs));
    }
    return ret;
}

From source file:com.datasalt.pangool.utils.HadoopUtils.java

License:Apache License

/**
 * Reads maps of integer -> integer from glob paths like "folder/part-r*"
 *//*from  w ww .j av  a2 s . c o  m*/
public static HashMap<Integer, Integer> readIntIntMapFromGlob(Path glob, FileSystem fs) throws IOException {
    FileStatus status[] = fs.globStatus(glob);
    HashMap<Integer, Integer> ret = new HashMap<Integer, Integer>();
    for (FileStatus fileS : status) {
        ret.putAll(readIntIntMap(fileS.getPath(), fs));
    }
    return ret;
}

From source file:com.ebay.erl.mobius.core.builder.AbstractDatasetBuilder.java

License:Apache License

/**
 * Add the <code>paths</code> to the underline dataset.  A boolean
 * flag <code>validatePathExistance</code> to specify if Mobius
 * needs to verify the specified <code>paths</code> exist or not.
 * <p>/*from  w  ww.  j  av  a 2s.c om*/
 * 
 * If <code>validatePathExistance</code> is true, and one of the
 * <code>paths</code> doesn't exist, <code>IOException</code> will
 * be thrown.
 * <p>
 * 
 * If a path exists and it's a folder, {@link #checkTouchFile(FileSystem, Path)} 
 * will be called to see if a touch file exists under that folder or not.
 * The default implementation of <code>checkTouchFile</code> always return
 * true, which means the dataset builder doesn't check touch file by default.
 * If this is a need to check touch file, the subclass should override that
 * function, and when the funciton return false, <code>IOException</code>
 * will be thrown here for that specific path.
 */
protected ACTUAL_BUILDER_IMPL addInputPath(boolean validatePathExistance, Path... paths) throws IOException {
    if (paths == null || paths.length == 0) {
        throw new IllegalArgumentException("Please specify at least one path");
    }

    FileSystem fs = FileSystem.get(this.mobiusJob.getConf());

    for (Path aPath : paths) {
        FileStatus[] fileStatus = null;

        try {
            fileStatus = fs.globStatus(aPath);
        } catch (NullPointerException e) {
            LOGGER.warn("FileSystem list globStatus thrown NPE", e);
        }

        if (fileStatus == null) {
            if (validatePathExistance) {
                throw new FileNotFoundException(aPath.toString() + " doesn't exist on file system.");
            } else {
                // no need to validate, as the input
                // for this dataset is coming from
                // the output of the other dataset.
                this.getDataset().addInputs(aPath);
            }
        } else {
            // file(s) exists, add inputs
            for (FileStatus aFileStatus : fileStatus) {
                Path p = aFileStatus.getPath();
                if (!fs.isFile(p)) {
                    if (!this.checkTouchFile(fs, p)) {
                        throw new IllegalStateException(
                                "No touch file under " + p.toString() + ", this dataset is not ready.");
                    } else {
                        this.getDataset().addInputs(p);
                    }
                } else {
                    this.getDataset().addInputs(p);
                }
            }
        }
    }

    return (ACTUAL_BUILDER_IMPL) this;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException {
    FileSystem fs = dictionaryPath.getFileSystem(conf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    int maxTermId = -1;
    for (FileStatus stat : fs.globStatus(dictionaryPath)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf);
        while (reader.next(key, value)) {
            maxTermId = Math.max(maxTermId, value.get());
        }/*from w  w w  . j a va 2  s .c  o m*/
    }
    return maxTermId + 1;
}

From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java

License:Apache License

public List<String> getAllFilePath(Path filePath, FileSystem fs, String destBasePath) throws IOException {
    List<String> fileList = new ArrayList<String>();
    List<String> inputPaths = new ArrayList<String>();
    FileStatus[] fileStatus = fs.globStatus(filePath);
    for (FileStatus fileStat : fileStatus) {
        if (fileStat.isFile()) {
            fileList.add(trimExtension(fileStat.getPath().toUri().getPath(), destBasePath));
        } else {// w w  w  .  j ava2 s .com
            //System.out.println("Found a directory : " + fileStat.getPath().toUri().getPath());
            inputPaths.add(fileStat.getPath().toUri().getPath());
        }
    }

    System.out.println("InputPaths size : " + inputPaths.size());
    if (inputPaths.size() > 0) {
        for (String path : inputPaths) {
            List<String> fstat = getFileStatusRecursive(new Path(path), fs, destBasePath);
            fileList.addAll(fstat);
        }
    }

    return fileList;
}

From source file:com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.java

License:Apache License

@Override
public void validate() {
    if (validate) {
        try {/*from  w ww  .  j  a  v a2  s. c  o  m*/
            FileSystem fs = FileSystem.get(new URI(filepattern), Job.getInstance().getConfiguration());
            FileStatus[] fileStatuses = fs.globStatus(new Path(filepattern));
            checkState(fileStatuses != null && fileStatuses.length > 0, "Unable to find any files matching %s",
                    filepattern);
        } catch (IOException | URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private static Map<Integer, Cluster> loadClusters(ClusterJob job) throws Exception {
    Map<Integer, Cluster> result = new HashMap<Integer, Cluster>();
    try {//ww  w  .j ava2 s  .c o m

        FileSystem fs = job.output.getFileSystem(job.conf);
        for (FileStatus seqFile : fs.globStatus(new Path(job.output, "part-*"))) {
            Path path = seqFile.getPath();
            //System.out.println("Input Path: " + path); doesn't this interfere with output?
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job.conf);
            try {
                Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
                Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
                while (reader.next(key, value)) {
                    Cluster cluster = (Cluster) value;
                    result.put(cluster.getId(), cluster);
                }
            } finally {
                reader.close();
            }
        }
    } finally {

    }
    return result;
}