List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
private void moveTaskOutputsToIRODS(TaskAttemptContext context, FileSystem outfs, Path outDir, FileSystem workfs, Path workOutput) throws IOException { context.progress();//from ww w . jav a 2 s . co m if (workfs.isFile(workOutput)) { Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath); FSDataOutputStream irods_os = null; FSDataInputStream temp_is = null; try { // commit to iRODS irods_os = outfs.create(finalOutputPath, true); temp_is = workfs.open(workOutput); byte[] buffer = new byte[100 * 1024]; int bytes_read = 0; while ((bytes_read = temp_is.read(buffer)) != -1) { irods_os.write(buffer, 0, bytes_read); } } finally { if (temp_is != null) { try { temp_is.close(); } catch (IOException ex) { // ignore exceptions } } // remove temporary file try { workfs.delete(workOutput, true); } catch (IOException ex) { // ignore exceptions } if (irods_os != null) { irods_os.close(); } } LOG.debug("Moved " + workOutput + " to " + finalOutputPath); } else if (workfs.getFileStatus(workOutput).isDir()) { FileStatus[] paths = workfs.listStatus(workOutput); Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath); outfs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputsToIRODS(context, outfs, outDir, workfs, path.getPath()); } } } }
From source file:edu.emory.bmi.medicurator.storage.HdfsStorage.java
License:Apache License
/** * save data from InputStream to a relative path * required parent directories will be created if not exists * @param path String path/*www.j a va 2 s . c o m*/ * @param in InputStream * @return boolean */ public boolean saveToPath(String path, InputStream in) { try { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf); Path dst = new Path(hdfsPath + path); //Create a new file and write data to it. hdfs.mkdirs(dst.getParent()); FSDataOutputStream out = hdfs.create(dst, true); byte[] b = new byte[1024]; int numBytes = 0; while ((numBytes = in.read(b)) > 0) { out.write(b, 0, numBytes); } //Close all the file descripters out.close(); hdfs.close(); return true; } catch (Exception e) { System.out.println("[ERROR] Exception occurs when HdfsStorage saveToPath (" + path + ")" + e); } return false; }
From source file:edu.indiana.soic.ts.mapreduce.pwd.PairWiseDistance.java
License:Open Source License
public int execJob(Configuration conf, String sequenceFileFullPath, String sequenceFile, String distDir) throws Exception { /* input parameters */ LOG.info(sequenceFileFullPath);/* w w w .ja v a 2 s. c o m*/ Job job = new Job(conf, "Pairwise-calc-" + sequenceFile); /* create the base dir for this job. Delete and recreates if it exists */ Path hdMainDir = new Path(distDir + "/" + sequenceFile); FileSystem fs = FileSystem.get(conf); fs.delete(hdMainDir, true); Path hdInputDir = new Path(hdMainDir, "data"); if (!fs.mkdirs(hdInputDir)) { throw new IOException("Mkdirs failed to create " + hdInputDir.toString()); } int noOfSequences = getNoOfSequences(sequenceFileFullPath, fs); int noOfDivisions = (int) Math.ceil(noOfSequences / (double) blockSize); int noOfBlocks = (noOfDivisions * (noOfDivisions + 1)) / 2; LOG.info("No of divisions :" + noOfDivisions + "\nNo of blocks :" + noOfBlocks + "\nBlock size :" + blockSize); // Retrieving the configuration form the job to set the properties // Setting properties to the original conf does not work (possible // Hadoop bug) Configuration jobConf = job.getConfiguration(); // Input dir in HDFS. Create this in newly created job base dir Path inputDir = new Path(hdMainDir, "input"); if (!fs.mkdirs(inputDir)) { throw new IOException("Mkdirs failed to create " + inputDir.toString()); } Long dataPartitionStartTime = System.nanoTime(); partitionData(sequenceFileFullPath, noOfSequences, blockSize, fs, noOfDivisions, jobConf, inputDir); distributeData(blockSize, conf, fs, hdInputDir, noOfDivisions); long dataPartTime = (System.nanoTime() - dataPartitionStartTime) / 1000000; LOG.info("Data Partition & Scatter Completed in (ms):" + dataPartTime); // Output dir in HDFS Path hdOutDir = new Path(hdMainDir, "out"); jobConf.setInt(Constants.BLOCK_SIZE, blockSize); jobConf.setInt(Constants.NO_OF_DIVISIONS, noOfDivisions); jobConf.setInt(Constants.NO_OF_SEQUENCES, noOfSequences); jobConf.set(Constants.DIST_FUNC, distFunc); job.setJarByClass(PairWiseDistance.class); job.setMapperClass(SWGMap.class); job.setReducerClass(SWGReduce.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(SWGWritable.class); FileInputFormat.setInputPaths(job, hdInputDir); FileOutputFormat.setOutputPath(job, hdOutDir); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(noOfDivisions); long startTime = System.currentTimeMillis(); int exitStatus = job.waitForCompletion(true) ? 0 : 1; double executionTime = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info("Job Finished in " + executionTime + " seconds"); LOG.info("# #seq\t#blockS\tTtime\tinput\tdataDistTime\toutput" + noOfSequences + "\t" + noOfBlocks + "\t" + executionTime + "\t" + sequenceFileFullPath + "\t" + dataPartTime + "\t" + hdMainDir); return exitStatus; }
From source file:edu.isi.mavuno.util.MavunoUtils.java
License:Apache License
public static void createDirectory(Configuration conf, String path) throws IOException { FileSystem fs; try {// www . j av a 2 s .c o m fs = FileSystem.get(new URI(path), conf); } catch (URISyntaxException e) { throw new RuntimeException("Error creating directory -- " + path); } fs.mkdirs(new Path(path)); }
From source file:edu.iu.ccd.CCDLauncher.java
License:Apache License
private void launch(String inputDirPath, int r, double lambda, int numIterations, int numMapTasks, int numThreadsPerWorker, int numModelSlices, String workDirPath, String testFilePath) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {/*from w ww . j av a2 s. c o m*/ Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path inputDir = new Path(inputDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true); fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); runCCD(inputDir, r, lambda, numIterations, numMapTasks, numThreadsPerWorker, numModelSlices, modelDir, outputDir, testFilePath, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total SGD Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.daal_cov.COVDaalLauncher.java
License:Apache License
private void launch(String inputDirPath, String workDirPath, int mem, int numMapTasks, int numThreadsPerWorker) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {// www .ja va 2s . c o m Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path inputDir = new Path(inputDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true); fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); runCOV(inputDir, mem, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total COV Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.daal_kmeans.regroupallgather.KMeansDaalLauncher.java
License:Apache License
/** * Launches all the tasks in order./*from ww w. jav a 2 s. c o m*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); /* Put shared libraries into the distributed cache */ init.loadDistributedLibs(); // load args init.loadSysArgs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2])); // config job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job kmeansJob = init.createJob("kmeansJob", KMeansDaalLauncher.class, KMeansDaalCollectiveMapper.class); // initialize centroids data JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration(); FileSystem fs = FileSystem.get(conf); int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]); int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]); Path workPath = init.getWorkPath(); Path cenDir = new Path(workPath, "centroids"); fs.mkdirs(cenDir); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } Path initCenDir = new Path(cenDir, "init_centroids"); DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs); thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString()); thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids"); //generate Data if required boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 3]); if (generateData) { Path inputPath = init.getInputPath(); int total_points = Integer.parseInt(args[init.getSysArgNum() + 4]); int total_files = Integer.parseInt(args[init.getSysArgNum() + 5]); String tmpDirPathName = args[init.getSysArgNum() + 6]; DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath, tmpDirPathName, fs); } // finish job boolean jobSuccess = kmeansJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { kmeansJob.killJob(); System.out.println("kmeansJob failed"); } return 0; }
From source file:edu.iu.daal_kmeans.regroupallgather.KMUtil.java
License:Apache License
/** * Generate centroids and upload to the cDir * /*from ww w . j a v a2 s. c o m*/ * @param numCentroids * @param vectorSize * @param configuration * @param random * @param cenDir * @param fs * @throws IOException */ static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cenDir, FileSystem fs) throws IOException { Random random = new Random(); double[] data = null; if (fs.exists(cenDir)) fs.delete(cenDir, true); if (!fs.mkdirs(cenDir)) { throw new IOException("Mkdirs failed to create " + cenDir.toString()); } data = new double[numCentroids * vectorSize]; for (int i = 0; i < data.length; i++) { data[i] = random.nextDouble() * 1000; } Path initClustersFile = new Path(cenDir, Constants.CENTROID_FILE_NAME); System.out.println("Generate centroid data." + initClustersFile.toString()); FSDataOutputStream out = fs.create(initClustersFile, true); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); for (int i = 0; i < data.length; i++) { if ((i % vectorSize) == (vectorSize - 1)) { bw.write(data[i] + ""); bw.newLine(); } else { bw.write(data[i] + " "); } } bw.flush(); bw.close(); System.out.println("Wrote centroids data to file"); }
From source file:edu.iu.daal_linreg.LinRegDaalLauncher.java
License:Apache License
private void launch(String inputDirPath, String testDirPath, String testGroundTruthDirPath, String workDirPath, int mem, int batchSize, int numMapTasks, int numThreadsPerWorker) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException { Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path inputDir = new Path(inputDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true);/*from w w w.j av a 2 s . co m*/ fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); runLinReg(inputDir, testDirPath, testGroundTruthDirPath, mem, batchSize, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total Linear Regression Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.daal_mom.MOMDaalLauncher.java
License:Apache License
private void launch(String inputDirPath, String workDirPath, int mem, int numMapTasks, int numThreadsPerWorker) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {/* w ww . j ava2s . c o m*/ Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path inputDir = new Path(inputDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true); fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); runMOM(inputDir, mem, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total MOM Execution Time: " + (endTime - startTime)); }