Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:org.apache.hama.examples.FastGraphGenTest.java

License:Apache License

@Test
public void testJsonGraphGenerator() throws Exception {
    Configuration conf = new Configuration();

    // vertex size : 20
    // maximum edges : 10
    // output path : /tmp/test
    // tasks num : 3
    // output type : json
    // weight : 0
    FastGraphGen.main(/* w w  w.j  a va2  s.  co  m*/
            new String[] { "-v", "20", "-e", "10", "-o", TEST_OUTPUT, "-t", "1", "-of", "json", "-w", "0" });
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*"));
    JSONParser parser = new JSONParser();
    for (FileStatus fts : globStatus) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fts.getPath())));
        try {
            String line;
            line = br.readLine();

            while (line != null) {
                JSONArray jsonArray = (JSONArray) parser.parse(line);

                // the edge data begins at the third element.
                JSONArray edgeArray = (JSONArray) jsonArray.get(2);
                assertTrue(edgeArray.size() <= 10);

                for (Object obj : edgeArray) {
                    JSONArray edge = (JSONArray) obj;
                    assertTrue(Integer.parseInt(edge.get(0).toString()) < 20);
                    assertTrue(Integer.parseInt(edge.get(0).toString()) >= 0);
                    assertTrue(Integer.parseInt(edge.get(1).toString()) == 0);
                }
                line = br.readLine();
            }
        } finally {
            br.close();
        }
    }

    fs.delete(new Path(TEST_OUTPUT), true);
}

From source file:org.apache.hama.examples.SymmetricMatrixGenTest.java

License:Apache License

@Test
public void testGraphGenerator() throws Exception {
    Configuration conf = new Configuration();

    SymmetricMatrixGen.main(new String[] { "20", "10", TEST_OUTPUT, "3" });
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*"));
    for (FileStatus fts : globStatus) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, fts.getPath(), conf);
        Text key = new Text();
        TextArrayWritable value = new TextArrayWritable();

        while (reader.next(key, value)) {
            String values = "";
            for (Writable v : value.get()) {
                values += v.toString() + " ";
            }//from  w  w w . j av  a2s.c o  m
            LOG.info(fts.getPath() + ": " + key.toString() + " | " + values);
        }
        reader.close();
    }

    fs.delete(new Path(TEST_OUTPUT), true);
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Reads output. The list of output records can be restricted to maxlines.
 * /* w  w w  . j  av a  2  s  .co m*/
 * @param conf
 * @param outPath
 * @param fs
 * @param maxlines
 * @return the list of output records
 * @throws IOException
 */
public static List<String> readOutput(Configuration conf, Path outPath, FileSystem fs, int maxlines)
        throws IOException {
    List<String> output = new ArrayList<String>();

    FileStatus[] globStatus = fs.globStatus(new Path(outPath + "/part-*"));
    for (FileStatus fts : globStatus) {
        BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fts.getPath())));
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] split = line.split("\t");
            output.add(split[1] + " belongs to cluster " + split[0]);

            if (output.size() >= maxlines)
                return output;
        }
    }

    return output;
}

From source file:org.apache.hama.pipes.Submitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();/*  w  w w .j a  v a  2 s .c o m*/
        return 1;
    }

    LOG.debug("Hama pipes Submitter started!");

    cli.addOption("input", false, "input path for bsp", "path");
    cli.addOption("output", false, "output path from bsp", "path");

    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    // cli.addArgument("javareader", false, "is the RecordReader in Java");

    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("outputformat", false, "java classname of OutputFormat", "class");

    cli.addOption("cachefiles", false, "additional cache files to add", "space delimited paths");

    cli.addOption("interpreter", false, "interpreter, like python or bash", "executable");

    cli.addOption("jobname", false, "the jobname", "name");

    cli.addOption("programArgs", false, "program arguments", "arguments");
    cli.addOption("bspTasks", false, "how many bsp tasks to launch", "number");
    cli.addOption("streaming", false, "if supplied, streaming is used instead of pipes", "");

    cli.addOption("jobconf", false,
            "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");

    cli.addOption("program", false, "URI to application executable", "class");
    Parser parser = cli.createParser();
    try {

        // check generic arguments -conf
        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        // get other arguments
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());

        BSPJob job = new BSPJob(getConf());

        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }

        if (results.hasOption("jobname")) {
            job.setJobName(results.getOptionValue("jobname"));
        }

        if (results.hasOption("inputformat")) {
            job.setInputFormat(getClass(results, "inputformat", conf, InputFormat.class));
        }

        if (results.hasOption("partitioner")) {
            job.setPartitioner(getClass(results, "partitioner", conf, Partitioner.class));
        }

        if (results.hasOption("outputformat")) {
            job.setOutputFormat(getClass(results, "outputformat", conf, OutputFormat.class));
        }

        if (results.hasOption("streaming")) {
            LOG.info("Streaming enabled!");
            job.set("hama.streaming.enabled", "true");
        }

        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=", 2);
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }

        if (results.hasOption("bspTasks")) {
            int optionValue = Integer.parseInt(results.getOptionValue("bspTasks"));
            conf.setInt("bsp.local.tasks.maximum", optionValue);
            conf.setInt("bsp.peers.num", optionValue);
        }

        if (results.hasOption("program")) {
            String executablePath = results.getOptionValue("program");
            setExecutable(job.getConfiguration(), executablePath);
            DistributedCache.addCacheFile(new Path(executablePath).toUri(), conf);
        }

        if (results.hasOption("interpreter")) {
            job.getConfiguration().set("hama.pipes.executable.interpretor",
                    results.getOptionValue("interpreter"));
        }

        if (results.hasOption("programArgs")) {
            job.getConfiguration().set("hama.pipes.executable.args",
                    Joiner.on(" ").join(results.getOptionValues("programArgs")));
            // job.getConfiguration().set("hama.pipes.resolve.executable.args",
            // "true");
        }

        if (results.hasOption("cachefiles")) {
            FileSystem fs = FileSystem.get(getConf());
            String[] optionValues = results.getOptionValues("cachefiles");
            for (String s : optionValues) {
                Path path = new Path(s);
                FileStatus[] globStatus = fs.globStatus(path);
                for (FileStatus f : globStatus) {
                    if (!f.isDir()) {
                        DistributedCache.addCacheFile(f.getPath().toUri(), job.getConfiguration());
                    } else {
                        LOG.info("Ignoring directory " + f.getPath() + " while globbing.");
                    }
                }
            }
        }

        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            @SuppressWarnings("deprecation")
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                @Override
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }

        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }

}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Run to discover dynamic partitions available
 *///from  w w  w  . j a v a  2 s.c om
private void discoverPartitions(JobContext context) throws IOException {
    if (!partitionsDiscovered) {
        //      LOG.info("discover ptns called");
        OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context);

        harProcessor.setEnabled(jobInfo.getHarRequested());

        List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols();
        int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions();

        Path loadPath = new Path(jobInfo.getLocation());
        FileSystem fs = loadPath.getFileSystem(context.getConfiguration());

        // construct a path pattern (e.g., /*/*) to find all dynamically generated paths
        String dynPathSpec = loadPath.toUri().getPath();
        dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*");

        //      LOG.info("Searching for "+dynPathSpec);
        Path pathPattern = new Path(dynPathSpec);
        FileStatus[] status = fs.globStatus(pathPattern);

        partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>();
        contextDiscoveredByPath = new LinkedHashMap<String, JobContext>();

        if (status.length == 0) {
            //        LOG.warn("No partition found genereated by dynamic partitioning in ["
            //            +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize()
            //            +"], dynSpec["+dynPathSpec+"]");
        } else {
            if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) {
                this.partitionsDiscovered = true;
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            for (FileStatus st : status) {
                LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>();
                Warehouse.makeSpecFromName(fullPartSpec, st.getPath());
                partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec);
                JobConf jobConf = (JobConf) context.getConfiguration();
                JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(),
                        InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf,
                                HCatHadoopShims.Instance.get().createTaskAttemptID())));
                HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec);
                contextDiscoveredByPath.put(st.getPath().toString(), currContext);
            }
        }

        //      for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){
        //        LOG.info("Partition "+ spec.getKey());
        //        for (Entry<String,String> e : spec.getValue().entrySet()){
        //          LOG.info(e.getKey() + "=>" +e.getValue());
        //        }
        //      }

        this.partitionsDiscovered = true;
    }
}

From source file:org.apache.hive.service.cli.TestScratchDir.java

License:Apache License

private void stageDirTest(String stageDirConfigStr, String stageDirName, boolean isLocal) throws IOException {
    String scratchDirStr = System.getProperty("test.tmp.dir") + File.separator + stageDirName;
    System.setProperty(stageDirConfigStr, scratchDirStr);
    ThriftCLIService service = new EmbeddedThriftBinaryCLIService();
    ThriftCLIServiceClient client = new ThriftCLIServiceClient(service);
    final Path scratchDir = new Path(scratchDirStr);
    Configuration conf = new Configuration();
    FileSystem fs = scratchDir.getFileSystem(conf);
    if (isLocal) {
        fs = FileSystem.getLocal(conf);
    }//w  w  w .  j  a v a 2  s.  c  om
    assertTrue(fs.exists(scratchDir));

    FileStatus[] fStatus = fs.globStatus(scratchDir);
    boolean foo = fStatus[0].equals(new FsPermission((short) 0777));
    assertEquals(new FsPermission((short) 0777), fStatus[0].getPermission());
    service.stop();
    fs.delete(scratchDir, true);
    System.clearProperty(stageDirConfigStr);
}

From source file:org.apache.ivory.cleanup.AbstractCleanupHandler.java

License:Apache License

protected FileStatus[] getAllLogs(org.apache.ivory.entity.v0.cluster.Cluster cluster, Entity entity)
        throws IvoryException {
    String stagingPath = ClusterHelper.getLocation(cluster, "staging");
    Path logPath = getLogPath(entity, stagingPath);
    FileSystem fs = getFileSystem(cluster);
    FileStatus[] paths;/*from  ww w .  j  a v  a2 s  . c o  m*/
    try {
        paths = fs.globStatus(logPath);
    } catch (IOException e) {
        throw new IvoryException(e);
    }
    return paths;
}

From source file:org.apache.ivory.latedata.LateDataHandler.java

License:Apache License

public long usage(Path inPath, Configuration conf) throws IOException {
    FileSystem fs = inPath.getFileSystem(conf);
    FileStatus status[] = fs.globStatus(inPath);
    if (status == null || status.length == 0) {
        return 0;
    }// ww  w.  j  av  a2 s.  c  o  m
    long totalSize = 0;
    for (FileStatus statu : status) {
        totalSize += fs.getContentSummary(statu.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.ivory.logging.LogProvider.java

License:Apache License

public String getResolvedRunId(FileSystem fs, Cluster cluster, Entity entity, Instance instance, String runId)
        throws IvoryException, IOException {
    if (StringUtils.isEmpty(runId)) {
        Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
                + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/*");

        FileStatus[] runs = fs.globStatus(jobPath);
        if (runs.length > 0) {
            // this is the latest run, dirs are sorted in increasing
            // order of runs
            return runs[runs.length - 1].getPath().getName();
        } else {/*from w w  w  . j  a v  a 2 s.  c  o m*/
            LOG.warn("No run dirs are available in logs dir:" + jobPath);
            return "-";
        }
    } else {
        Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
                + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + getFormatedRunId(runId));
        if (fs.exists(jobPath)) {
            return getFormatedRunId(runId);
        } else {
            Log.warn("No run dirs are available in logs dir:" + jobPath);
            return "-";
        }
    }

}

From source file:org.apache.ivory.logging.LogProvider.java

License:Apache License

private Instance populateActionLogUrls(FileSystem fs, Cluster cluster, Entity entity, Instance instance,
        String formatedRunId) throws IvoryException, OozieClientException, IOException {

    Path actionPaths = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity)
            + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId + "/*");
    FileStatus[] actions = fs.globStatus(actionPaths);
    InstanceAction[] instanceActions = new InstanceAction[actions.length - 1];
    instance.actions = instanceActions;//w ww  .  j  a va2s .com
    int i = 0;
    for (FileStatus file : actions) {
        Path filePath = file.getPath();
        String dfsBrowserUrl = getDFSbrowserUrl(
                ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-"
                        + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId,
                file.getPath().getName());
        if (filePath.getName().equals("oozie.log")) {
            instance.logFile = dfsBrowserUrl;
            continue;
        }

        InstanceAction instanceAction = new InstanceAction(getActionName(filePath.getName()),
                getActionStatus(filePath.getName()), dfsBrowserUrl);
        instanceActions[i++] = instanceAction;
    }

    return instance;

}