List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException
Return all the files that match filePattern and are not checksum files.
From source file:org.apache.hama.examples.FastGraphGenTest.java
License:Apache License
@Test public void testJsonGraphGenerator() throws Exception { Configuration conf = new Configuration(); // vertex size : 20 // maximum edges : 10 // output path : /tmp/test // tasks num : 3 // output type : json // weight : 0 FastGraphGen.main(/* w w w.j a va2 s. co m*/ new String[] { "-v", "20", "-e", "10", "-o", TEST_OUTPUT, "-t", "1", "-of", "json", "-w", "0" }); FileSystem fs = FileSystem.get(conf); FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*")); JSONParser parser = new JSONParser(); for (FileStatus fts : globStatus) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fts.getPath()))); try { String line; line = br.readLine(); while (line != null) { JSONArray jsonArray = (JSONArray) parser.parse(line); // the edge data begins at the third element. JSONArray edgeArray = (JSONArray) jsonArray.get(2); assertTrue(edgeArray.size() <= 10); for (Object obj : edgeArray) { JSONArray edge = (JSONArray) obj; assertTrue(Integer.parseInt(edge.get(0).toString()) < 20); assertTrue(Integer.parseInt(edge.get(0).toString()) >= 0); assertTrue(Integer.parseInt(edge.get(1).toString()) == 0); } line = br.readLine(); } } finally { br.close(); } } fs.delete(new Path(TEST_OUTPUT), true); }
From source file:org.apache.hama.examples.SymmetricMatrixGenTest.java
License:Apache License
@Test public void testGraphGenerator() throws Exception { Configuration conf = new Configuration(); SymmetricMatrixGen.main(new String[] { "20", "10", TEST_OUTPUT, "3" }); FileSystem fs = FileSystem.get(conf); FileStatus[] globStatus = fs.globStatus(new Path(TEST_OUTPUT + "/part-*")); for (FileStatus fts : globStatus) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, fts.getPath(), conf); Text key = new Text(); TextArrayWritable value = new TextArrayWritable(); while (reader.next(key, value)) { String values = ""; for (Writable v : value.get()) { values += v.toString() + " "; }//from w w w . j av a2s.c o m LOG.info(fts.getPath() + ": " + key.toString() + " | " + values); } reader.close(); } fs.delete(new Path(TEST_OUTPUT), true); }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Reads output. The list of output records can be restricted to maxlines. * /* w w w . j av a 2 s .co m*/ * @param conf * @param outPath * @param fs * @param maxlines * @return the list of output records * @throws IOException */ public static List<String> readOutput(Configuration conf, Path outPath, FileSystem fs, int maxlines) throws IOException { List<String> output = new ArrayList<String>(); FileStatus[] globStatus = fs.globStatus(new Path(outPath + "/part-*")); for (FileStatus fts : globStatus) { BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fts.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] split = line.split("\t"); output.add(split[1] + " belongs to cluster " + split[0]); if (output.size() >= maxlines) return output; } } return output; }
From source file:org.apache.hama.pipes.Submitter.java
License:Apache License
@Override public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();/* w w w .j a v a 2 s .c o m*/ return 1; } LOG.debug("Hama pipes Submitter started!"); cli.addOption("input", false, "input path for bsp", "path"); cli.addOption("output", false, "output path from bsp", "path"); cli.addOption("jar", false, "job jar file", "path"); cli.addOption("inputformat", false, "java classname of InputFormat", "class"); // cli.addArgument("javareader", false, "is the RecordReader in Java"); cli.addOption("partitioner", false, "java classname of Partitioner", "class"); cli.addOption("outputformat", false, "java classname of OutputFormat", "class"); cli.addOption("cachefiles", false, "additional cache files to add", "space delimited paths"); cli.addOption("interpreter", false, "interpreter, like python or bash", "executable"); cli.addOption("jobname", false, "the jobname", "name"); cli.addOption("programArgs", false, "program arguments", "arguments"); cli.addOption("bspTasks", false, "how many bsp tasks to launch", "number"); cli.addOption("streaming", false, "if supplied, streaming is used instead of pipes", ""); cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val"); cli.addOption("program", false, "URI to application executable", "class"); Parser parser = cli.createParser(); try { // check generic arguments -conf GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args); // get other arguments CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs()); BSPJob job = new BSPJob(getConf()); if (results.hasOption("input")) { FileInputFormat.setInputPaths(job, results.getOptionValue("input")); } if (results.hasOption("output")) { FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output"))); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("jobname")) { job.setJobName(results.getOptionValue("jobname")); } if (results.hasOption("inputformat")) { job.setInputFormat(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("partitioner")) { job.setPartitioner(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("outputformat")) { job.setOutputFormat(getClass(results, "outputformat", conf, OutputFormat.class)); } if (results.hasOption("streaming")) { LOG.info("Streaming enabled!"); job.set("hama.streaming.enabled", "true"); } if (results.hasOption("jobconf")) { LOG.warn("-jobconf option is deprecated, please use -D instead."); String options = results.getOptionValue("jobconf"); StringTokenizer tokenizer = new StringTokenizer(options, ","); while (tokenizer.hasMoreTokens()) { String keyVal = tokenizer.nextToken().trim(); String[] keyValSplit = keyVal.split("=", 2); job.set(keyValSplit[0], keyValSplit[1]); } } if (results.hasOption("bspTasks")) { int optionValue = Integer.parseInt(results.getOptionValue("bspTasks")); conf.setInt("bsp.local.tasks.maximum", optionValue); conf.setInt("bsp.peers.num", optionValue); } if (results.hasOption("program")) { String executablePath = results.getOptionValue("program"); setExecutable(job.getConfiguration(), executablePath); DistributedCache.addCacheFile(new Path(executablePath).toUri(), conf); } if (results.hasOption("interpreter")) { job.getConfiguration().set("hama.pipes.executable.interpretor", results.getOptionValue("interpreter")); } if (results.hasOption("programArgs")) { job.getConfiguration().set("hama.pipes.executable.args", Joiner.on(" ").join(results.getOptionValues("programArgs"))); // job.getConfiguration().set("hama.pipes.resolve.executable.args", // "true"); } if (results.hasOption("cachefiles")) { FileSystem fs = FileSystem.get(getConf()); String[] optionValues = results.getOptionValues("cachefiles"); for (String s : optionValues) { Path path = new Path(s); FileStatus[] globStatus = fs.globStatus(path); for (FileStatus f : globStatus) { if (!f.isDir()) { DistributedCache.addCacheFile(f.getPath().toUri(), job.getConfiguration()); } else { LOG.info("Ignoring directory " + f.getPath() + " while globbing."); } } } } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { @SuppressWarnings("deprecation") final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { @Override public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } runJob(job); return 0; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Run to discover dynamic partitions available *///from w w w . j a v a 2 s.c om private void discoverPartitions(JobContext context) throws IOException { if (!partitionsDiscovered) { // LOG.info("discover ptns called"); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); harProcessor.setEnabled(jobInfo.getHarRequested()); List<Integer> dynamicPartCols = jobInfo.getPosOfDynPartCols(); int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); Path loadPath = new Path(jobInfo.getLocation()); FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); // construct a path pattern (e.g., /*/*) to find all dynamically generated paths String dynPathSpec = loadPath.toUri().getPath(); dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); // LOG.info("Searching for "+dynPathSpec); Path pathPattern = new Path(dynPathSpec); FileStatus[] status = fs.globStatus(pathPattern); partitionsDiscoveredByPath = new LinkedHashMap<String, Map<String, String>>(); contextDiscoveredByPath = new LinkedHashMap<String, JobContext>(); if (status.length == 0) { // LOG.warn("No partition found genereated by dynamic partitioning in [" // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() // +"], dynSpec["+dynPathSpec+"]"); } else { if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { this.partitionsDiscovered = true; throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed."); } for (FileStatus st : status) { LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(); Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); JobConf jobConf = (JobConf) context.getConfiguration(); JobContext currContext = HCatMapRedUtil.createJobContext(jobConf, context.getJobID(), InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, HCatHadoopShims.Instance.get().createTaskAttemptID()))); HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); contextDiscoveredByPath.put(st.getPath().toString(), currContext); } } // for (Entry<String,Map<String,String>> spec : partitionsDiscoveredByPath.entrySet()){ // LOG.info("Partition "+ spec.getKey()); // for (Entry<String,String> e : spec.getValue().entrySet()){ // LOG.info(e.getKey() + "=>" +e.getValue()); // } // } this.partitionsDiscovered = true; } }
From source file:org.apache.hive.service.cli.TestScratchDir.java
License:Apache License
private void stageDirTest(String stageDirConfigStr, String stageDirName, boolean isLocal) throws IOException { String scratchDirStr = System.getProperty("test.tmp.dir") + File.separator + stageDirName; System.setProperty(stageDirConfigStr, scratchDirStr); ThriftCLIService service = new EmbeddedThriftBinaryCLIService(); ThriftCLIServiceClient client = new ThriftCLIServiceClient(service); final Path scratchDir = new Path(scratchDirStr); Configuration conf = new Configuration(); FileSystem fs = scratchDir.getFileSystem(conf); if (isLocal) { fs = FileSystem.getLocal(conf); }//w w w . j a v a 2 s. c om assertTrue(fs.exists(scratchDir)); FileStatus[] fStatus = fs.globStatus(scratchDir); boolean foo = fStatus[0].equals(new FsPermission((short) 0777)); assertEquals(new FsPermission((short) 0777), fStatus[0].getPermission()); service.stop(); fs.delete(scratchDir, true); System.clearProperty(stageDirConfigStr); }
From source file:org.apache.ivory.cleanup.AbstractCleanupHandler.java
License:Apache License
protected FileStatus[] getAllLogs(org.apache.ivory.entity.v0.cluster.Cluster cluster, Entity entity) throws IvoryException { String stagingPath = ClusterHelper.getLocation(cluster, "staging"); Path logPath = getLogPath(entity, stagingPath); FileSystem fs = getFileSystem(cluster); FileStatus[] paths;/*from ww w . j a v a2 s . c o m*/ try { paths = fs.globStatus(logPath); } catch (IOException e) { throw new IvoryException(e); } return paths; }
From source file:org.apache.ivory.latedata.LateDataHandler.java
License:Apache License
public long usage(Path inPath, Configuration conf) throws IOException { FileSystem fs = inPath.getFileSystem(conf); FileStatus status[] = fs.globStatus(inPath); if (status == null || status.length == 0) { return 0; }// ww w. j av a2 s. c o m long totalSize = 0; for (FileStatus statu : status) { totalSize += fs.getContentSummary(statu.getPath()).getLength(); } return totalSize; }
From source file:org.apache.ivory.logging.LogProvider.java
License:Apache License
public String getResolvedRunId(FileSystem fs, Cluster cluster, Entity entity, Instance instance, String runId) throws IvoryException, IOException { if (StringUtils.isEmpty(runId)) { Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/*"); FileStatus[] runs = fs.globStatus(jobPath); if (runs.length > 0) { // this is the latest run, dirs are sorted in increasing // order of runs return runs[runs.length - 1].getPath().getName(); } else {/*from w w w . j a v a 2 s. c o m*/ LOG.warn("No run dirs are available in logs dir:" + jobPath); return "-"; } } else { Path jobPath = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + getFormatedRunId(runId)); if (fs.exists(jobPath)) { return getFormatedRunId(runId); } else { Log.warn("No run dirs are available in logs dir:" + jobPath); return "-"; } } }
From source file:org.apache.ivory.logging.LogProvider.java
License:Apache License
private Instance populateActionLogUrls(FileSystem fs, Cluster cluster, Entity entity, Instance instance, String formatedRunId) throws IvoryException, OozieClientException, IOException { Path actionPaths = new Path(ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId + "/*"); FileStatus[] actions = fs.globStatus(actionPaths); InstanceAction[] instanceActions = new InstanceAction[actions.length - 1]; instance.actions = instanceActions;//w ww . j a va2s .com int i = 0; for (FileStatus file : actions) { Path filePath = file.getPath(); String dfsBrowserUrl = getDFSbrowserUrl( ClusterHelper.getHdfsUrl(cluster), EntityUtil.getLogPath(cluster, entity) + "/job-" + EntityUtil.UTCtoURIDate(instance.instance) + "/" + formatedRunId, file.getPath().getName()); if (filePath.getName().equals("oozie.log")) { instance.logFile = dfsBrowserUrl; continue; } InstanceAction instanceAction = new InstanceAction(getActionName(filePath.getName()), getActionStatus(filePath.getName()), dfsBrowserUrl); instanceActions[i++] = instanceAction; } return instance; }