List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java
License:Apache License
public void split(Path baseInputDir, Path baseOutputDir) throws IOException { FileSystem fs = baseInputDir.getFileSystem(getConf()); Path action1DirPath = new Path(baseOutputDir, options.getAction1Dir()); Path action2DirPath = new Path(baseOutputDir, options.getAction2Dir()); Path actionOtherDirPath = new Path(baseOutputDir, options.getActionOtherDir()); Path action1FilePath = new Path(action1DirPath, options.getAction1File()); Path action2FilePath = new Path(action2DirPath, options.getAction2File()); Path actionOtherFilePath = new Path(actionOtherDirPath, options.getActionOtherFile()); FSDataOutputStream action1File; FSDataOutputStream action2File; FSDataOutputStream actionOtherFile;/*www.j a v a2 s. com*/ if (!fs.exists(baseOutputDir)) { LOGGER.info("Preference output dir:" + baseOutputDir.toString() + " does not exist. creating it."); fs.mkdirs(baseOutputDir); } if (fs.exists(action1DirPath)) fs.delete(action1DirPath, true); if (fs.exists(action2DirPath)) fs.delete(action2DirPath, true); if (fs.exists(actionOtherDirPath)) fs.delete(actionOtherDirPath, true); // cleaned out prefs if they existed, now create a place to put the new ones fs.mkdirs(action1DirPath); fs.mkdirs(action2DirPath); fs.mkdirs(actionOtherDirPath); action1File = fs.create(action1FilePath); action2File = fs.create(action2FilePath); actionOtherFile = fs.create(actionOtherFilePath); List<FSDataInputStream> actionFiles = getActionFiles(baseInputDir); Integer uniqueUserIDCounter = 0; Integer uniqueItemIDCounter = 0; for (FSDataInputStream stream : actionFiles) { BufferedReader bin = new BufferedReader(new InputStreamReader(stream)); String actionLogLine; while ((actionLogLine = bin.readLine()) != null) {//get user to make a rec for String[] columns = actionLogLine.split(options.getInputDelimiter()); if (options.getTimestampColumn() != -1) { // ignoring for now but may be useful String timestamp = columns[options.getTimestampColumn()].trim(); } String externalUserIDString = columns[options.getUserIdColumn()].trim(); String externalItemIDString = columns[options.getItemIdColumn()].trim(); String actionString = columns[options.getActionColumn()].trim(); // create a bi-directional index of external->internal ids String internalUserID; String internalItemID; if (this.userIndex.containsKey(externalUserIDString)) {// already in the user index internalUserID = this.userIndex.get(externalUserIDString); } else { internalUserID = uniqueUserIDCounter.toString(); this.userIndex.forcePut(externalUserIDString, internalUserID); uniqueUserIDCounter += 1; if (uniqueUserIDCounter % 10000 == 0) LOGGER.debug( "Splitter processed: " + Integer.toString(uniqueUserIDCounter) + " unique users."); } if (this.itemIndex.containsKey(externalItemIDString)) {// already in the item index internalItemID = this.itemIndex.get(externalItemIDString); } else { internalItemID = uniqueItemIDCounter.toString(); this.itemIndex.forcePut(externalItemIDString, internalItemID); uniqueItemIDCounter += 1; } if (actionString.equals(options.getAction1())) { action1File.writeBytes(internalUserID + options.getOutputDelimiter() + internalItemID + options.getOutputDelimiter() + "1.0\n"); } else if (actionString.equals(options.getAction2())) { action2File.writeBytes(internalUserID + options.getOutputDelimiter() + internalItemID + options.getOutputDelimiter() + "1.0\n"); } else { actionOtherFile.writeBytes(actionLogLine);//write what's not recognized } } } action1File.close(); action2File.close(); actionOtherFile.close(); int i = 0;//breakpoint after close to inspect files }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
@Override public void mkdirs(final DataFile dir) throws IOException { final Path path = getPath(dir); if (path == null) { throw new NullPointerException("Path to create is null"); }//from w ww . j a va 2 s. c o m if (this.conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(this.conf); if (fs == null) { throw new IOException("Unable to create the directory, The FileSystem is null"); } if (!fs.mkdirs(path)) { throw new IOException("Unable to create the directory: " + dir); } }
From source file:fr.ens.biologie.genomique.eoulsan.MainHadoop.java
License:LGPL
@Override protected Handler getLogHandler(final URI logFile) throws IOException { if (logFile == null) { throw new NullPointerException("The log file is null"); }/*from w w w .j a v a2 s. co m*/ final Path loggerPath = new Path(logFile); final FileSystem loggerFs = loggerPath.getFileSystem(this.conf); final Path parentPath = loggerPath.getParent(); // Create parent directory if necessary if (!loggerFs.exists(loggerPath.getParent())) { if (!loggerFs.mkdirs(loggerPath.getParent())) { throw new IOException("Unable to create directory " + parentPath + " for log file:" + logFile); } } return new StreamHandler(loggerFs.create(loggerPath), Globals.LOG_FORMATTER); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w ww .j av a 2 s.co m*/ */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.upload.DataFileDistCp.java
License:LGPL
public void copy(final Map<DataFile, DataFile> entries) throws IOException { if (entries == null || entries.size() == 0) { return;//from w w w .jav a 2 s . c om } final Configuration conf = this.conf; final Path tmpInputDir = PathUtils.createTempPath(this.jobPath, "distcp-in-", "", conf); final Path tmpOutputDir = PathUtils.createTempPath(this.jobPath, "distcp-out-", "", conf); // // Create entries for distcp // final FileSystem fs = tmpInputDir.getFileSystem(conf); fs.mkdirs(tmpInputDir); // Sort files by size final List<DataFile> inFiles = Lists.newArrayList(entries.keySet()); sortInFilesByDescSize(inFiles); // Set the format for the id of the copy task final NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumIntegerDigits(Integer.toString(inFiles.size()).length()); nf.setGroupingUsed(false); int count = 0; for (DataFile inFile : inFiles) { count++; final DataFile outFile = entries.get(inFile); final Path f = new Path(tmpInputDir, "distcp-" + nf.format(count) + ".cp"); getLogger().info("Task copy " + inFile + " in " + f.toString()); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(f), CHARSET)); bw.write(inFile.getSource() + "\t" + outFile.getSource() + "\n"); bw.close(); } final Job job = createJobConf(conf, tmpInputDir, tmpOutputDir); try { job.waitForCompletion(false); } catch (InterruptedException | ClassNotFoundException e) { throw new EoulsanRuntimeException("Error while distcp: " + e.getMessage(), e); } // Remove tmp directory PathUtils.fullyDelete(tmpInputDir, conf); PathUtils.fullyDelete(tmpOutputDir, conf); if (!job.isSuccessful()) { throw new IOException("Unable to copy files using DataFileDistCp."); } }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Create a directory. If parent directories don't exists create it. * @param path Path of the directory to create * @param conf Configuration// w w w. j a va 2 s . co m * @return true if the directory is successfully created * @throws IOException if an error occurs while creating the directory */ public static final boolean mkdirs(final Path path, final Configuration conf) throws IOException { if (path == null) { throw new NullPointerException("The path of the directory to create is null."); } if (conf == null) { throw new NullPointerException("The configuration is null"); } final FileSystem fs = path.getFileSystem(conf); return fs.mkdirs(path); }
From source file:fr.jetoile.hadoopunit.component.OozieBootstrapTest.java
License:Apache License
@Test public void oozieShouldStart() throws Exception { LOGGER.info("OOZIE: Test Submit Workflow Start"); FileSystem hdfsFs = ((HdfsBootstrap) HadoopBootstrap.INSTANCE.getService(Component.HDFS)) .getHdfsFileSystemHandle();/* ww w. jav a 2 s .c om*/ OozieClient oozieClient = ((OozieBootstrap) HadoopBootstrap.INSTANCE.getService(Component.OOZIE)) .getOozieClient(); Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp"); hdfsFs.mkdirs(new Path(appPath, "lib")); Path workflow = new Path(appPath, "workflow.xml"); //write workflow.xml String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + " <start to='end'/>" + " <end name='end'/>" + "</workflow-app>"; Writer writer = new OutputStreamWriter(hdfsFs.create(workflow)); writer.write(wfApp); writer.close(); //write job.properties Properties conf = oozieClient.createConfiguration(); conf.setProperty(OozieClient.APP_PATH, workflow.toString()); conf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); //submit and check final String jobId = oozieClient.submit(conf); WorkflowJob wf = oozieClient.getJobInfo(jobId); assertNotNull(wf); assertEquals(WorkflowJob.Status.PREP, wf.getStatus()); LOGGER.info("OOZIE: Workflow: {}", wf.toString()); hdfsFs.close(); assertThat("true").isEqualTo("true"); }
From source file:fr.jetoile.hadoopunit.integrationtest.IntegrationBootstrapTest.java
License:Apache License
@Test public void oozieShouldStart() throws Exception { LOGGER.info("OOZIE: Test Submit Workflow Start"); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.set("fs.default.name", "hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY)); URI uri = URI.create("hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY)); FileSystem hdfsFs = FileSystem.get(uri, conf); OozieClient oozieClient = new OozieClient("http://" + configuration.getString(OozieBootstrap.OOZIE_HOST) + ":" + configuration.getInt(OozieBootstrap.OOZIE_PORT) + "/oozie"); Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp"); hdfsFs.mkdirs(new Path(appPath, "lib")); Path workflow = new Path(appPath, "workflow.xml"); //write workflow.xml String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + " <start to='end'/>" + " <end name='end'/>" + "</workflow-app>"; Writer writer = new OutputStreamWriter(hdfsFs.create(workflow)); writer.write(wfApp);//from ww w .j a v a 2 s . c om writer.close(); //write job.properties Properties oozieConf = oozieClient.createConfiguration(); oozieConf.setProperty(OozieClient.APP_PATH, workflow.toString()); oozieConf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); //submit and check final String jobId = oozieClient.submit(oozieConf); WorkflowJob wf = oozieClient.getJobInfo(jobId); Assert.assertNotNull(wf); assertEquals(WorkflowJob.Status.PREP, wf.getStatus()); LOGGER.info("OOZIE: Workflow: {}", wf.toString()); hdfsFs.close(); }
From source file:fr.jetoile.hadoopunit.integrationtest.ManualIntegrationBootstrapTest.java
License:Apache License
@Test public void oozieShouldStart() throws Exception { LOGGER.info("OOZIE: Test Submit Workflow Start"); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.set("fs.default.name", "hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY)); URI uri = URI.create("hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY)); FileSystem hdfsFs = FileSystem.get(uri, conf); OozieClient oozieClient = new OozieClient("http://" + configuration.getString(OozieBootstrap.OOZIE_HOST) + ":" + configuration.getInt(OozieBootstrap.OOZIE_PORT) + "/oozie"); Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp"); hdfsFs.mkdirs(new Path(appPath, "lib")); Path workflow = new Path(appPath, "workflow.xml"); //write workflow.xml String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + " <start to='end'/>" + " <end name='end'/>" + "</workflow-app>"; Writer writer = new OutputStreamWriter(hdfsFs.create(workflow)); writer.write(wfApp);/*ww w . j av a 2 s.co m*/ writer.close(); //write job.properties Properties oozieConf = oozieClient.createConfiguration(); oozieConf.setProperty(OozieClient.APP_PATH, workflow.toString()); oozieConf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); //submit and check final String jobId = oozieClient.submit(oozieConf); WorkflowJob wf = oozieClient.getJobInfo(jobId); Assert.assertNotNull(wf); assertEquals(WorkflowJob.Status.PREP, wf.getStatus()); LOGGER.info("OOZIE: Workflow: {}", wf.toString()); hdfsFs.close(); }
From source file:fr.jetoile.hadoopunit.integrationtest.SparkIntegrationTest.java
License:Apache License
@Before public void before() throws IOException, URISyntaxException { FileSystem fileSystem = HdfsUtils.INSTANCE.getFileSystem(); fileSystem.mkdirs(new Path("/khanh/test")); fileSystem.mkdirs(new Path("/khanh/test_parquet")); fileSystem.copyFromLocalFile(/*from www . ja v a 2s. c om*/ new Path(SparkIntegrationTest.class.getClassLoader().getResource("test.csv").toURI()), new Path("/khanh/test/test.csv")); new HiveSetup(HiveConnectionUtils.INSTANCE.getDestination(), Operations.sequenceOf(CREATE_TABLES)).launch(); }