Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus


In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.


public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link


Return all the files that match filePattern and are not checksum files.


From source file:org.apache.hama.examples.FastGraphGenTest.java

License:Apache License

public void testJsonGraphGenerator() throws Exception {
    Configuration conf = new Configuration();

    // vertex size : 20
    // maximum edges : 10
    // output path : /tmp/test
    // tasks num : 3
    // output type : json
    // weight : 0
    FastGraphGen.main(/* w w  w.j  a va2  s.  co  m*/
            new String[] { "-v", "20", "-e", "10", "-o", TEST_OUTPUT, "-t", "1", "-of", "json", "-w", "0" });
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*"));
    JSONParser parser = new JSONParser();
    for (FileStatus fts : globStatus) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fts.getPath())));
        try {
            String line;
            line = br.readLine();

            while (line != null) {
                JSONArray jsonArray = (JSONArray) parser.parse(line);

                // the edge data begins at the third element.
                JSONArray edgeArray = (JSONArray) jsonArray.get(2);
                assertTrue(edgeArray.size() <= 10);

                for (Object obj : edgeArray) {
                    JSONArray edge = (JSONArray) obj;
                    assertTrue(Integer.parseInt(edge.get(0).toString()) < 20);
                    assertTrue(Integer.parseInt(edge.get(0).toString()) >= 0);
                    assertTrue(Integer.parseInt(edge.get(1).toString()) == 0);
                line = br.readLine();
        } finally {

    fs.delete(new Path(TEST_OUTPUT), true);

From source file:org.apache.hama.examples.SymmetricMatrixGenTest.java

License:Apache License

public void testGraphGenerator() throws Exception {
    Configuration conf = new Configuration();

    SymmetricMatrixGen.main(new String[] { "20", "10", TEST_OUTPUT, "3" });
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*"));
    for (FileStatus fts : globStatus) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, fts.getPath(), conf);
        Text key = new Text();
        TextArrayWritable value = new TextArrayWritable();

        while (reader.next(key, value)) {
            String values = "";
            for (Writable v : value.get()) {
                values += v.toString() + " ";
            }//from  w  w w . j av  a2s.c o  m
            LOG.info(fts.getPath() + ": " + key.toString() + " | " + values);

    fs.delete(new Path(TEST_OUTPUT), true);

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

 * Reads output. The list of output records can be restricted to maxlines.
 * /* w  w w  . j  av a  2  s  .co m*/
 * @param conf
 * @param outPath
 * @param fs
 * @param maxlines
 * @return the list of output records
 * @throws IOException
public static List<String> readOutput(Configuration conf, Path outPath, FileSystem fs, int maxlines)
        throws IOException {
    List<String> output = new ArrayList<String>();

    FileStatus[] globStatus = fs.globStatus(new Path(outPath + "/part-*"));
    for (FileStatus fts : globStatus) {
        BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fts.getPath())));
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] split = line.split("\t");
            output.add(split[1] + " belongs to cluster " + split[0]);

            if (output.size() >= maxlines)
                return output;

    return output;

From source file:org.apache.hama.pipes.Submitter.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();/*  w  w w .j a  v a  2 s .c o m*/
        return 1;

    LOG.debug("Hama pipes Submitter started!");

    cli.addOption("input", false, "input path for bsp", "path");
    cli.addOption("output", false, "output path from bsp", "path");

    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    // cli.addArgument("javareader", false, "is the RecordReader in Java");

    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("outputformat", false, "java classname of OutputFormat", "class");

    cli.addOption("cachefiles", false, "additional cache files to add", "space delimited paths");

    cli.addOption("interpreter", false, "interpreter, like python or bash", "executable");

    cli.addOption("jobname", false, "the jobname", "name");

    cli.addOption("programArgs", false, "program arguments", "arguments");
    cli.addOption("bspTasks", false, "how many bsp tasks to launch", "number");
    cli.addOption("streaming", false, "if supplied, streaming is used instead of pipes", "");

    cli.addOption("jobconf", false,
            "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");

    cli.addOption("program", false, "URI to application executable", "class");
    Parser parser = cli.createParser();
    try {

        // check generic arguments -conf
        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        // get other arguments
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());

        BSPJob job = new BSPJob(getConf());

        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        if (results.hasOption("jar")) {

        if (results.hasOption("jobname")) {

        if (results.hasOption("inputformat")) {
            job.setInputFormat(getClass(results, "inputformat", conf, InputFormat.class));

        if (results.hasOption("partitioner")) {
            job.setPartitioner(getClass(results, "partitioner", conf, Partitioner.class));

        if (results.hasOption("outputformat")) {
            job.setOutputFormat(getClass(results, "outputformat", conf, OutputFormat.class));

        if (results.hasOption("streaming")) {
            LOG.info("Streaming enabled!");
            job.set("hama.streaming.enabled", "true");

        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=", 2);
                job.set(keyValSplit[0], keyValSplit[1]);

        if (results.hasOption("bspTasks")) {
            int optionValue = Integer.parseInt(results.getOptionValue("bspTasks"));
            conf.setInt("bsp.local.tasks.maximum", optionValue);
            conf.setInt("bsp.peers.num", optionValue);

        if (results.hasOption("program")) {
            String executablePath = results.getOptionValue("program");
            setExecutable(job.getConfiguration(), executablePath);
            DistributedCache.addCacheFile(new Path(executablePath).toUri(), conf);

        if (results.hasOption("interpreter")) {

        if (results.hasOption("programArgs")) {
                    Joiner.on(" ").join(results.getOptionValues("programArgs")));
            // job.getConfiguration().set("hama.pipes.resolve.executable.args",
            // "true");

        if (results.hasOption("cachefiles")) {
            FileSystem fs = FileSystem.get(getConf());
            String[] optionValues = results.getOptionValues("cachefiles");
            for (String s : optionValues) {
                Path path = new Path(s);
                FileStatus[] globStatus = fs.globStatus(path);
                for (FileStatus f : globStatus) {
                    if (!f.isDir()) {
                        DistributedCache.addCacheFile(f.getPath().toUri(), job.getConfiguration());
                    } else {
                        LOG.info("Ignoring directory " + f.getPath() + " while globbing.");

        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);

        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        return 1;


From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

 * Run to discover dynamic partitions available
 *///from  w w  w  . j a v a  2 s.c om
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context);


        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;

From source file:org.apache.hive.service.cli.TestScratchDir.java

License:Apache License

private void stageDirTest(String stageDirConfigStr, String stageDirName, boolean isLocal) throws IOException {
    String scratchDirStr = System.getProperty("test.tmp.dir") + File.separator + stageDirName;
    System.setProperty(stageDirConfigStr, scratchDirStr);
    ThriftCLIService service = new EmbeddedThriftBinaryCLIService();
    ThriftCLIServiceClient client = new ThriftCLIServiceClient(service);
    final Path scratchDir = new Path(scratchDirStr);
    Configuration conf = new Configuration();
    FileSystem fs = scratchDir.getFileSystem(conf);
    if (isLocal) {
        fs = FileSystem.getLocal(conf);
    }//w  w  w .  j  a v a 2  s.  c  om

    FileStatus[] fStatus = fs.globStatus(scratchDir);
    boolean foo = fStatus[0].equals(new FsPermission((short) 0777));
    assertEquals(new FsPermission((short) 0777), fStatus[0].getPermission());
    fs.delete(scratchDir, true);

From source file:org.apache.ivory.cleanup.AbstractCleanupHandler.java

License:Apache License

protected FileStatus[] getAllLogs(org.apache.ivory.entity.v0.cluster.Cluster cluster, Entity entity)
        throws IvoryException {
    String stagingPath = ClusterHelper.getLocation(cluster, "staging");
    Path logPath = getLogPath(entity, stagingPath);
    FileSystem fs = getFileSystem(cluster);
    FileStatus[] paths;/*from  ww w .  j  a v  a2 s  . c o  m*/
    try {
        paths = fs.globStatus(logPath);
    } catch (IOException e) {
        throw new IvoryException(e);
    return paths;

From source file:org.apache.ivory.latedata.LateDataHandler.java

License:Apache License

public long usage(Path inPath, Configuration conf) throws IOException {
    FileSystem fs = inPath.getFileSystem(conf);
    FileStatus status[] = fs.globStatus(inPath);
    if (status == null || status.length == 0) {
        return 0;
    }// ww  w.  j  av  a2 s.  c  o  m
    long totalSize = 0;
    for (FileStatus statu : status) {
        totalSize += fs.getContentSummary(statu.getPath()).getLength();
    return totalSize;

From source file:org.apache.ivory.logging.LogProvider.java

License:Apache License

public String getResolvedRunId(FileSystem fs, Cluster cluster, Entity entity, Instance instance, String runId)
        throws IvoryException, IOException {
    if (StringUtils.isEmpty(runId)) {
        Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
                + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/*");

        FileStatus[] runs = fs.globStatus(jobPath);
        if (runs.length > 0) {
            // this is the latest run, dirs are sorted in increasing
            // order of runs
            return runs[runs.length - 1].getPath().getName();
        } else {/*from w w  w  . j  a v  a 2 s.  c  o m*/
            LOG.warn("No run dirs are available in logs dir:" + jobPath);
            return "-";
    } else {
        Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
                + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + getFormatedRunId(runId));
        if (fs.exists(jobPath)) {
            return getFormatedRunId(runId);
        } else {
            Log.warn("No run dirs are available in logs dir:" + jobPath);
            return "-";


From source file:org.apache.ivory.logging.LogProvider.java

License:Apache License

private Instance populateActionLogUrls(FileSystem fs, Cluster cluster, Entity entity, Instance instance,
        String formatedRunId) throws IvoryException, OozieClientException, IOException {

    Path actionPaths = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
            + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId + "/*");
    FileStatus[] actions = fs.globStatus(actionPaths);
    InstanceAction[] instanceActions = new InstanceAction[actions.length - 1];
    instance.actions = instanceActions;//w ww  .  j  a va2s .com
    int i = 0;
    for (FileStatus file : actions) {
        Path filePath = file.getPath();
        String dfsBrowserUrl = getDFSbrowserUrl(
                ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-"
                        + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId,
        if (filePath.getName().equals("oozie.log")) {
            instance.logFile = dfsBrowserUrl;

        InstanceAction instanceAction = new InstanceAction(getActionName(filePath.getName()),
                getActionStatus(filePath.getName()), dfsBrowserUrl);
        instanceActions[i++] = instanceAction;

    return instance;
