List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:etl.cmd.test.MiniOozieTestCase.java
License:Apache License
@Override protected void setUp() throws Exception { System.setProperty("hadoop20", "true"); super.setUp(); FileSystem fs = getFileSystem(); String remoteShareLibFolder = "/user/" + getOozieUser() + "/share/lib"; fs.delete(new Path(remoteShareLibFolder), true); fs.mkdirs(new Path(remoteShareLibFolder)); LocalOozie.start();/*from w w w . j a va 2 s.com*/ }
From source file:etl.cmd.test.XFsTestCase.java
License:Apache License
private Path initFileSystem(FileSystem fs) throws Exception { Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString()); Path testDirInFs = fs.makeQualified(path); System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs)); if (fs.exists(testDirInFs)) { setAllPermissions(fs, testDirInFs); }/*from www.jav a 2s.c o m*/ fs.delete(testDirInFs, true); if (!fs.mkdirs(path)) { throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs)); } fs.setOwner(testDirInFs, getTestUser(), getTestGroup()); fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x")); return testDirInFs; }
From source file:etl.cmd.test.XTestCase.java
License:Apache License
private void setUpEmbeddedHadoop(String testCaseDir) throws Exception { if (dfsCluster == null && mrCluster == null) { if (System.getProperty("hadoop.log.dir") == null) { System.setProperty("hadoop.log.dir", testCaseDir); }/*from ww w . ja v a 2 s . co m*/ String oozieUser = getOozieUser(); JobConf conf = createDFSConfig(); String[] userGroups = new String[] { getTestGroup(), getTestGroup2() }; UserGroupInformation.createUserForTesting(oozieUser, userGroups); UserGroupInformation.createUserForTesting(getTestUser(), userGroups); UserGroupInformation.createUserForTesting(getTestUser2(), userGroups); UserGroupInformation.createUserForTesting(getTestUser3(), new String[] { "users" }); try { MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); dfsCluster = builder.build(); FileSystem fileSystem = dfsCluster.getFileSystem(); fileSystem.mkdirs(new Path("target/test-data")); fileSystem.mkdirs(new Path("target/test-data" + "/minicluster/mapred")); fileSystem.mkdirs(new Path("/user")); fileSystem.mkdirs(new Path("/tmp")); fileSystem.mkdirs(new Path("/hadoop/mapred/system")); fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("target/test-data" + "/minicluster"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("target/test-data" + "/minicluster/mapred"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------")); mrCluster = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); Configuration jobConf = mrCluster.getConfig(); System.setProperty(OOZIE_TEST_JOB_TRACKER, jobConf.get("mapreduce.jobtracker.address")); String rmAddress = jobConf.get("yarn.resourcemanager.address"); log.info("Job tracker: " + rmAddress); if (rmAddress != null) { System.setProperty(OOZIE_TEST_JOB_TRACKER, rmAddress); } System.setProperty(OOZIE_TEST_NAME_NODE, jobConf.get("fs.defaultFS")); ProxyUsers.refreshSuperUserGroupsConfiguration(conf); } catch (Exception ex) { shutdownMiniCluster(); throw ex; } new MiniClusterShutdownMonitor().start(); } }
From source file:etl.cmd.test.XTestCase.java
License:Apache License
private void setUpEmbeddedHadoop2() throws Exception { if (dfsCluster != null && dfsCluster2 == null) { // Trick dfs location for MiniDFSCluster since it doesn't accept location as input) String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data"); try {//from ww w. j a v a2 s.c om System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2")); // Only DFS cluster is created based upon current need MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(createDFSConfig()); dfsCluster2 = builder.build(); FileSystem fileSystem = dfsCluster2.getFileSystem(); fileSystem.mkdirs(new Path("target/test-data")); fileSystem.mkdirs(new Path("/user")); fileSystem.mkdirs(new Path("/tmp")); fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.defaultFS")); } catch (Exception ex) { shutdownMiniCluster2(); throw ex; } finally { // Restore previus value System.setProperty("test.build.data", testBuildDataSaved); } } }
From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java
License:Apache License
@Override public int run(String[] args) { try {/*ww w.j av a 2 s .c o m*/ Configuration conf = HBaseConfiguration.create(); //additional output using TextOutputFormat. conf.set("file.names", args[3]); Job job = Job.getInstance(conf); //TableMapReduceUtil.addDependencyJars(job); job.setJarByClass(CompetencesDistanceDriver.class); //This row must be changed job.setJobName("Words Group By Title Driver"); Path inPath = new Path(args[0]); Path outPath = new Path(args[1]); Path competencesPath = new Path(args[2]); Path competencesPathHDFS = competencesPath; FileSystem fs = FileSystem.get(conf); if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { competencesPathHDFS = new Path(competencesPath.getName()); if (!fs.exists(competencesPathHDFS)) { fs.mkdirs(competencesPathHDFS); File[] stats = new File(competencesPath.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) { Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } } } job.addCacheFile(competencesPathHDFS.toUri()); FileInputFormat.setInputPaths(job, inPath); FileOutputFormat.setOutputPath(job, outPath); fs.delete(outPath, true); job.setMapperClass(CompetencesDistanceMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(CompetencesDistanceReducer.class); // job.setOutputFormatClass(TableOutputFormat.class); // job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String[] fileNames = args[3].split(","); for (String n : fileNames) { MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class); } return (job.waitForCompletion(true) ? 0 : 1); } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException | ClassNotFoundException ex) { Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex); } return 0; }
From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); Job job = Job.getInstance(jobconf);//from w ww . j a va2 s. c o m FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path dictionary = new Path(args[0]); Path dictionaryHdfs = dictionary; Path localDocs = new Path(args[2]); Path hdfsDocs = localDocs; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { dictionaryHdfs = new Path(dictionary.getName()); if (!fs.exists(dictionaryHdfs)) { fs.copyFromLocalFile(dictionary, dictionaryHdfs); } hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); fs.deleteOnExit(hdfsDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, dictionaryHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dictionaryHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in;/*from ww w. j a va 2 s .c o m*/ if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); FileStatus inHdfsStatus = fs.getFileStatus(inHdfs); // Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()}); } Job job = Job.getInstance(jobconf); Path stopwordsLocal = new Path(args[3]); stopwords = new Path(stopwordsLocal.getName()); fs.delete(stopwords, true); fs.copyFromLocalFile(stopwordsLocal, stopwords); fs.deleteOnExit(stopwords); FileStatus stopwordsStatus = fs.getFileStatus(stopwords); stopwords = stopwordsStatus.getPath(); job.addCacheFile(stopwords.toUri()); Path localDocs = new Path(args[2]); Path hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); hdfsDocs = fs.getFileStatus(hdfsDocs).getPath(); fs.delete(hdfsDocs, true); // FileStatus[] stats = fs.listStatus(localDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { // for (FileStatus stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, inHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}", NLineInputFormat.getNumLinesPerSplit(job)); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.scape_project.tb.lsdr.seqfileutility.hadoop.HadoopJob.java
License:Apache License
/** * Run hadoop job//from w w w . j ava 2s . c om * * @param strings Command line arguments * @return Success indicator * @throws Exception */ @Override public int run(String[] strings) throws Exception { try { String hdfsInputDir = null; FileSystem hdfs = FileSystem.get(conf); // hdfs input path is given as command parameter if (pc.getHdfsInputPath() != null) { hdfsInputDir = pc.getHdfsInputPath(); // hdfs input file is created } else { hdfsInputDir = "input/" + System.currentTimeMillis() + "sfu/"; String[] extensions = null; if (pc.getExtStr() != null) { StringTokenizer st = new StringTokenizer(pc.getExtStr(), ","); extensions = new String[st.countTokens()]; int i = 0; while (st.hasMoreTokens()) { extensions[i] = st.nextToken(); i++; } } hdfs.mkdirs(new Path(hdfsInputDir)); String hdfsIinputPath = hdfsInputDir + "inputpaths.txt"; Path path = new Path(hdfsIinputPath); FSDataOutputStream outputStream = hdfs.create(path); List<String> dirs = StringUtils.getStringListFromString(pc.getDirsStr(), ","); for (String dir : dirs) { File directory = new File(dir); if (directory.isDirectory()) { // Alternatively, the java traverse method can be used // for creating the file paths: //traverse(directory, outputStream); writeFilePaths(directory, outputStream); } else { logger.warn("Parameter \"" + dir + "\" is not a directory " + "(skipped)"); } } outputStream.close(); if (hdfs.exists(path)) { logger.info( "Input paths created in \"" + hdfs.getHomeDirectory() + "/" + path.toString() + "\""); } else { logger.error("Input paths have not been created in hdfs."); return 1; } } String hadoopJobName = "Hadoop_sequence_file_creation"; if (pc.getHadoopJobName() != null && !pc.getHadoopJobName().equals("")) hadoopJobName = pc.getHadoopJobName(); Job job = new Job(conf, hadoopJobName); job.setJarByClass(SequenceFileUtility.class); job.setMapperClass(SmallFilesSequenceFileMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); TextInputFormat.addInputPath(job, new Path(hdfsInputDir)); // todo: support absolute paths String hdfsOutputDir = pc.getOutputDirectory() != null ? pc.getOutputDirectory() : "output/" + System.currentTimeMillis() + "sfu/"; SequenceFileOutputFormat.setOutputPath(job, new Path(hdfsOutputDir)); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.get(pc.getCompressionType())); int success = job.waitForCompletion(true) ? 0 : 1; boolean seqFileExists = hdfs.exists(new Path(hdfsOutputDir + "part-r-00000")); if (success == 0 && seqFileExists) { logger.info("Sequence file created: \"" //+ hdfs.getHomeDirectory() + "/" + new Path(hdfsOutputDir).toString() + "/part-r-00000" + "\""); pc.setOutputDirectory(hdfsOutputDir); return 0; } else { logger.error("Sequence file not created in hdfs"); return 1; } } catch (Exception e) { logger.error("IOException occurred", e); } finally { } return 0; }
From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java
License:Apache License
public void setupJob(JobConf conf) throws IOException { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME); FileSystem fileSys = tmpDir.getFileSystem(conf); if (!fileSys.mkdirs(tmpDir)) { LOG.error("Mkdirs failed to create " + tmpDir.toString()); }/* w ww. j av a 2 s. c o m*/ } }
From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java
License:Apache License
private void moveTaskOutputs(JobConf conf, TaskAttemptID taskAttemptID, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(conf, taskAttemptID)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + taskAttemptID); }/*w ww . j a v a 2s . co m*/ if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + taskAttemptID); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTempTaskOutputPath(conf, taskAttemptID)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(conf, taskAttemptID, fs, jobOutputDir, path.getPath()); } } } }