List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyDir(FileSystem fileSystem, Path directory, String directoryName) throws IOException { Path dir = HadoopFileTestUtils.createPath(directory.toString(), directoryName); assertTrue(fileSystem.mkdirs(dir)); return fileSystem.getFileStatus(dir).getPath(); }
From source file:com.yss.util.YarnUtil.java
License:Open Source License
@SuppressWarnings("rawtypes") public static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf, YarnConfiguration yarnConf) throws IOException { // dump stringwriter's content into FS conf/storm.yaml Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING); Path dirDst = confDst.getParent(); fs.mkdirs(dirDst); //storm.yaml/*from w w w. j av a 2s. c o m*/ FSDataOutputStream out = fs.create(confDst); Yaml yaml = new Yaml(); OutputStreamWriter writer = new OutputStreamWriter(out); rmNulls(stormConf); yaml.dump(stormConf, writer); writer.close(); out.close(); //yarn-site.xml Path yarn_site_xml = new Path(dirDst, "yarn-site.xml"); out = fs.create(yarn_site_xml); writer = new OutputStreamWriter(out); yarnConf.writeXml(writer); writer.close(); out.close(); return dirDst; }
From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java
License:Apache License
@Override public boolean mkdirs(String dirPath) throws IOException { Path dstPath = new Path(dirPath); FileSystem dstFs = dstPath.getFileSystem(getConf()); return dstFs.mkdirs(dstPath); }
From source file:datafu.hourglass.demo.GenerateIds.java
License:Apache License
private void createDataForDate(FileSystem fs, Path outputPath, Date date) throws IOException { // make sure output path exists if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); }/*from w w w.ja va 2s . c o m*/ Path datePath = new Path(outputPath, dateFormat.format(date)); System.out.println("Writing to " + datePath.toString() + " with range " + startId + " to " + endId); DataFileWriter<GenericRecord> dataWriter; OutputStream outputStream; Path dailyPath = outputPath; Path path = new Path(dailyPath, dateFormat.format(date)); // delete directory if it already exists if (fs.exists(path)) { fs.delete(path, true); } outputStream = fs.create(new Path(path, "part-00000.avro")); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(); dataWriter = new DataFileWriter<GenericRecord>(writer); dataWriter.create(EVENT_SCHEMA, outputStream); GenericRecord record = new GenericData.Record(EVENT_SCHEMA); // create 1000 random IDs for (int i = 0; i < 1000; i++) { long val; if (startId == endId) { val = startId; } else { val = (long) (startId + random.nextInt(endId - startId + 1)); } record.put("id", val); dataWriter.append(record); } dataWriter.close(); outputStream.close(); }
From source file:datafu.hourglass.jobs.StagedOutputJob.java
License:Apache License
/** * Run the job and wait for it to complete. Output will be temporarily stored under the staging path. * If the job is successful it will be moved to the final location. *///from ww w .j av a2s. com @Override public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); final Path stagedPath = new Path(String.format("%s/%s/staged", _stagingPrefix, System.currentTimeMillis())); FileOutputFormat.setOutputPath(this, stagedPath); final Thread hook = new Thread(new Runnable() { @Override public void run() { try { killJob(); } catch (IOException e) { e.printStackTrace(); } } }); Runtime.getRuntime().addShutdownHook(hook); final boolean retVal = super.waitForCompletion(verbose); Runtime.getRuntime().removeShutdownHook(hook); if (retVal) { FileSystem fs = actualOutputPath.getFileSystem(getConfiguration()); fs.mkdirs(actualOutputPath); _log.info(String.format("Deleting data at old path[%s]", actualOutputPath)); fs.delete(actualOutputPath, true); _log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath, actualOutputPath)); boolean renamed = fs.rename(stagedPath, actualOutputPath); if (renamed && _writeCounters) { writeCounters(fs); } return renamed; } else { FileSystem fs = actualOutputPath.getFileSystem(getConfiguration()); _log.info(String.format("Job failed, deleting staged path[%s]", stagedPath)); try { fs.delete(stagedPath, true); } catch (IOException e) { } } _log.warn("retVal was false for some reason..."); return retVal; }
From source file:de.gesundkrank.wikipedia.hadoop.io.WikiDumpLoader.java
License:Open Source License
/** * Returns {@link FileStatus} of the latest dump in the HDFS * * @param fs HDFS/*from ww w.ja v a2 s.c om*/ * @param basepath Base path of hdfs wikidumps * @return */ private FileStatus checkLocalDumps(FileSystem fs, Path basepath) { long lastLocalChange = 0; FileStatus lastLocalDump = null; try { if (!fs.exists(basepath)) { fs.mkdirs(basepath); return null; } FileStatus[] stati = fs.listStatus(basepath); for (FileStatus status : stati) { long fileChange = status.getModificationTime(); if (fileChange > lastLocalChange) { lastLocalDump = status; } } } catch (IOException e) { logger.error(e); } return lastLocalDump; }
From source file:de.mpii.fsm.driver.FsmDriver.java
License:Apache License
/** * (non-Javadoc)/* w ww . ja va2s. c o m*/ * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) * * Add the appropriate options here. Execute the MG-FSM algorithm * according to the parameters specified at run time. * * @param String[] args * @return int */ @Override public int run(String[] args) throws Exception { /* Here parameters that will be available to the user * during run time are specified and intialized. */ /* Hadooop-config options */ addOutputOption(); /*User-interesting options*/ addOption("input", "i", "(Optional) Specify the path from where the input is to be read" + "\n NOTE: This option can not be used with -(r)esume option.", null); addOption("support", "s", "(Optional) Minimum support (sigma) " + "\nDefault Value: 1\n", FsmConfig.SIGMA_DEFAULT_STRING); addOption("gamma", "g", "(Optional) Maximum allowed for mining frequent sequences (gamma)" + " by MG-FSM " + "\nDefault Value: 2\n", FsmConfig.GAMMA_DEFAULT_STRING); addOption("lambda", "l", "(Optional) Maximum length for mining frequent sequences (lambda)" + "\nDefault Value: 5\n", FsmConfig.LAMBDA_DEFAULT_STRING); addOption("execMode", "m", "Method of execution viz. s -(s)equential or d -(d)istributed" + "\nDefault Value: (s)-sequential\n", FsmConfig.DEFAULT_EXEC_MODE); addOption("type", "t", "(Optional) Specify the mining mode." + "\nExpected values for input:" + "\n1. a -(a)ll\n2. m -(m)aximal \n3. c -(c)losed" + "\nDefault Value : a -(a)ll\n", FsmConfig.DEFAULT_TYPE); /* keepFiles default value is null. * It will be set to a temporary location, in case * no path is specified.*/ addOption("keepFiles", "k", "(Optional) Keep the intermediary files " + "for later use or runs. The files stored are:" + "\n1. Dictionary \n2. Encoded Sequences \n " + "Specify the intermediate path where to keep these files :", null); /* resume points to the location where the * intermediary files are located*/ addOption("resume", "r", "(Optional) Resume running further " + "runs of the MG-FSM algorithm on" + " already encoded transaction file located in the folder specified in input.\n", null); /*Developer-interesting options*/ addOption("partitionSize", "p", "(Optional) Explicitly specify the partition size." + "\nDefault Value: 10000", FsmConfig.DEFAULT_PARTITION_SIZE); addOption("indexing", "id", "(Optional) Specify the indexing mode." + "\nExpected values for input:" + "\n1. none\n2. minmax \n3. full" + "\nDefault Value : full\n", FsmConfig.DEFAULT_INDEXING_METHOD); /* split flag is false by default*/ addFlag("split", "sp", "(Optional) Explicitly specify " + "whether or not to allow split by setting this flag."); addOption("numReducers", "N", "(Optional) Number of reducers to be used by MG-FSM. Default value: 90 ", "90"); /*------------------------------------------------------------ * ERROR CHECKS *------------------------------------------------------------*/ /* Parse the arguments received from * the user during run-time.*/ if (parseArguments(args) == null) { System.out.println("\n------------\n" + " E R R O R " + "\n------------\n"); System.out.println("One of the mandatory options is NOT specified"); System.out.println("e.g. the input option MUST be specified."); //Return a non-zero exit status to indicate failure return 1; } Parameters params = new Parameters(); if (hasOption("tempDir")) { String tempDirPath = getOption("tempDir"); params.set("tempDir", tempDirPath); } if (hasOption("input")) { String inputString = getOption("input"); params.set("input", inputString); } else { params.set("input", null); } if (hasOption("support")) { String supportString = getOption("support"); /* * Checks & constraints on the value that can * be assigned to support, gamma, & lambda. * * NOTE: refer [1] */ if (Integer.parseInt(supportString) < 1) { System.out.println("Value of support should be greater than or equal to 1"); //Return a non-zero exit status to indicate failure return (1); } params.set("support", supportString); } if (hasOption("gamma")) { String gammaString = getOption("gamma"); if (Integer.parseInt(gammaString) < 0) { System.out.println("Value of gap should be greater than or equal to 0"); //Return a non-zero exit status to indicate failure return (1); } params.set("gamma", gammaString); } if (hasOption("lambda")) { String lambdaString = getOption("lambda"); if (Integer.parseInt(lambdaString) < 2) { System.out.println("Value of length should be greater than or equal to 2"); //Return a non-zero exit status to indicate failure return (1); } params.set("lambda", lambdaString); } if (hasOption("execMode")) { String modeString = getOption("execMode"); params.set("execMode", modeString); } if (hasOption("type")) { String modeString = getOption("type"); params.set("type", modeString); } if (hasOption("indexing")) { String indexingString = getOption("indexing"); params.set("indexing", indexingString); } if (hasOption("partitionSize")) { String partitionString = getOption("partitionSize"); params.set("partitionSize", partitionString); } if (hasOption("split")) { params.set("split", "true"); } else { params.set("split", "false"); } if (hasOption("keepFiles")) { String keepFilesString = getOption("keepFiles"); params.set("keepFiles", keepFilesString); } else { params.set("keepFiles", null); } if (hasOption("resume")) { String resumeString = getOption("resume"); params.set("resume", resumeString); } else { params.set("resume", null); } if (hasOption("numReducers")) { String numReducersString = getOption("numReducers"); params.set("numReducers", numReducersString); } else { params.set("numReducers", null); } Path inputDir = null; Path outputDir = getOutputPath(); /* --------------------------------------------------------------------- * ERROR CHECKS ON COMBINATION OF OPTIONS SUPPLIED TO THE DRIVER * --------------------------------------------------------------------*/ //Complain if the '-(t)ype' is equal to '-(m)aximal' or '-(c)losed' and //the 'tempDir' is not specified /*if((params.get("tempDir")==null||params.get("tempDir").contentEquals("temp"))&& ((params.get("type").toCharArray()[0]=='m')||(params.get("type").toCharArray()[0]=='c'))){ System.out .println("If -(t)ype is -(m)aximal or -(c)losed then a -tempDir path must be specified"); }*/ if ((params.get("resume") != null) && (params.get("keepFiles") != null)) { System.out.println("-(r)esume & -(k)eepFiles are mutually exclusive options"); System.out.println("Exiting..."); //Return a non-zero exit status to indicate failure return (1); } if ((params.get("input") != null) && (params.get("resume") != null)) { System.out.println("-(r)esume & -(i)nput are mutually exclusive options"); System.out.println("Exiting..."); //Return a non-zero exit status to indicate failure return (1); } if ((params.get("input") == null) && (params.get("resume") == null)) { System.out.println("At least one option from -(i)nput or -(r)esume must be specified"); System.out.println("Exiting..."); //Return a non-zero exit status to indicate failure return (1); } else { if (params.get("input") != null) { inputDir = new Path(params.get("input")); } else { inputDir = new Path(params.get("resume")); } } /* --------------------------------------------------------------------- * Checks to make sure the i/o paths * exist and are consistent. * -------------------------------------------------------------------- */ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); //If the output paths exist clean them up if (fs.exists(outputDir)) { System.out.println("Deleting existing output path"); fs.delete(outputDir, true); } //Create the necessary output paths afresh now fs.mkdirs(outputDir); //Complain if the input path doesn't exist if (!fs.exists(inputDir)) { System.out.println("\n------------\n" + " E R R O R " + "\n------------\n"); System.out.println("Input path does not exist OR input option not specified. Exiting..."); //Return a non-zero exit status to indicate failure return (1); } if (inputDir.toString().compareTo(outputDir.toString()) == 0) { System.out.println("\n------------\n" + " E R R O R " + "\n------------\n"); System.out.println("The input and output path can NOT be same." + "\nThe output path is deleted prior to running the Hadoop jobs." + "\nHence, the input would be also deleted if paths are same." + "\nExiting..."); //Return a non-zero exit status to indicate failure return (1); } params.set("input", inputDir.toString()); params.set("output", outputDir.toString()); /*--------------------------------------------------------------------- * END OF ERROR CHECKS * --------------------------------------------------------------------*/ /* Execute the FSM Job depending upon the parameters specified. */ String executionMethod = getOption("execMode"); //Set the resume and keepFiles flags in the commonConfig. //Also, set the intermediateOutput path accordingly. if (params.get("resume") != null) commonConfig.setResumeOption(true); else commonConfig.setResumeOption(false); if (params.get("keepFiles") != null) { commonConfig.setKeepFilesOption(true); Path intermediateDir = new Path(params.get("keepFiles")); if (fs.exists(intermediateDir)) { fs.delete(intermediateDir, true); } commonConfig.setIntermediatePath(params.get("keepFiles")); } else { File intermediateOutputPath = File.createTempFile("MG_FSM_INTRM_OP_", ""); //Below JDK 7 we are only allowed to create temporary files. //Hence, turn the file into a directory in temporary folder. intermediateOutputPath.delete(); intermediateOutputPath.mkdir(); commonConfig.setIntermediatePath(intermediateOutputPath.getAbsolutePath().toString()); System.out.println("The intermediate output will be written \n" + "to this temporary path :" + intermediateOutputPath); commonConfig.setKeepFilesOption(false); } //Set the 'tempDir' if its null if (params.get("tempDir") == null || params.get("tempDir").contentEquals("temp")) { File tempOutputPath = File.createTempFile("MG_FSM_TEMP_OP_", ""); tempOutputPath.delete(); //tempOutputPath.mkdir(); commonConfig.setTmpPath(tempOutputPath.getAbsolutePath().toString()); System.out.println("The temporary output associated with the internal map -reduce\n" + "jobs will be written to this temporary path :" + commonConfig.getTmpPath()); } else { commonConfig.setTmpPath(params.get("tempDir")); } //Set the input and output paths of the commonConfig commonConfig.setInputPath(params.get("input")); commonConfig.setOutputPath(params.get("output")); commonConfig.setDictionaryPath( commonConfig.getIntermediatePath().concat("/" + Constants.OUTPUT_DICTIONARY_FILE_PATH)); //Supply the rest of the algorithm specific options to commonConfig commonConfig.setSigma(Integer.parseInt(params.get("support"))); commonConfig.setGamma(Integer.parseInt(params.get("gamma"))); commonConfig.setLambda(Integer.parseInt(params.get("lambda"))); commonConfig.setPartitionSize(Long.parseLong(params.get("partitionSize"))); commonConfig.setAllowSplits(Boolean.parseBoolean(params.get("splits"))); if (params.get("numReducers") != null) { commonConfig.setNumberOfReducers(Integer.parseInt(params.get("numReducers"))); } switch (params.get("type").toCharArray()[0]) { case 'a': { commonConfig.setType(FsmConfig.Type.ALL); break; } case 'm': { commonConfig.setType(FsmConfig.Type.MAXIMAL); break; } case 'c': { commonConfig.setType(FsmConfig.Type.CLOSED); break; } default: { commonConfig.setType(FsmConfig.Type.ALL); break; } } switch (params.get("indexing").toCharArray()[0]) { case 'n': { commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.NONE); break; } case 'm': { commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.MINMAX); break; } case 'f': { commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.FULL); break; } default: { commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.FULL); break; } } //SEQUENTIAL EXECUTION MODE if ("s".equalsIgnoreCase(executionMethod)) { SequentialMode mySequentialMiner; mySequentialMiner = new SequentialMode(commonConfig); // If we are dealing with a fresh set of transactions // we need to do encode & then mine. if (!commonConfig.isResumeOption()) { mySequentialMiner.createDictionary(commonConfig.getInputPath()); mySequentialMiner.createIdToItemMap(); //If the input path is a corpus //runSeqJob will recursively call encodeAndMine() //on all the files to bring together a encoded sequences file //and consequently call the sequences miner on each of these //encoded sequences mySequentialMiner.runSeqJob(new File(commonConfig.getInputPath())); } /* * If the transactions are encoded from previous runs, then run * the following set of functions for reading the encoded transactions * and then directly mine them for frequent sequences. */ else { mySequentialMiner.setIdToItemMap(new Dictionary().readDictionary( commonConfig.getInputPath().concat("/" + Constants.OUTPUT_DICTIONARY_FILE_PATH))); mySequentialMiner.encodeAndMine(mySequentialMiner.getCommonConfig().getInputPath()); } } //DISTRIBUTED EXECUTION MODE else if ("d".equalsIgnoreCase(executionMethod)) { DistributedMode myDistributedMiner = new DistributedMode(commonConfig); /*Execute the appropriate job based on whether we need to * encode the input sequences or not. */ if (!commonConfig.isResumeOption()) myDistributedMiner.runJobs(); else myDistributedMiner.resumeJobs(); } //END OF EXECUTING FSM JOB //Return a zero exit status to indicate successful completion return 0; }
From source file:distributed.hadoop.HDFSUtils.java
License:Open Source License
/** * Create our staging directory in HDFS (if necessary) * //from w ww . ja va2 s. c om * @param config the HDFSConfig containing connection details * @throws IOException if a problem occurs */ protected static void createTmpDistributedCacheDirIfNecessary(HDFSConfig config) throws IOException { Configuration conf = new Configuration(); config.configureForHadoop(conf, null); FileSystem fs = FileSystem.get(conf); Path p = new Path(resolvePath(WEKA_TEMP_DISTRIBUTED_CACHE_FILES, null)); if (!fs.exists(p)) { fs.mkdirs(p); } }
From source file:dz.lab.hdfs.CopyMove.java
public static void createDirectory(FileSystem fs) throws IOException { Path newDir = new Path("/tmp/playArea/newDir"); boolean created = fs.mkdirs(newDir); System.out.println("Created: " + created); }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
/** * Create the temporary directory that is the root of all of the task work * directories.//from w w w . ja va 2 s .c om * * @param context the job's context */ public void setupJob(JobContext context) throws IOException { if (this.outputPath != null && this.tempPath != null) { Path tmpDir = new Path(this.tempPath, HirodsFileOutputCommitter.TEMP_DIR_NAME); FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration()); if (!fileSys.mkdirs(tmpDir)) { LOG.error("Mkdirs failed to create " + tmpDir.toString()); } } }