List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:edu.iu.daal_naive.NaiveDaalLauncher.java
License:Apache License
private void launch(String inputDirPath, String testDirPath, String testGroundTruthDirPath, String workDirPath, int mem, int vecsize, int num_class, int num_test, int numMapTasks, int numThreadsPerWorker, boolean generateData, int num_train_points, int numfiles) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException { Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path inputDir = new Path(inputDirPath); Path testDir = new Path(testDirPath); Path testGroundTruthDir = new Path(testGroundTruthDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true);/*from ww w. j a va 2s.co m*/ fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); //test and generate training datasets if (generateData) { System.out.println("Generate Naive Baytes Training datasets."); NaiveUtil.generateData(num_train_points, num_test, vecsize, numfiles, num_class, fs, "/tmp/naive", inputDir, testDir, testGroundTruthDir); } runNaive(inputDir, testDirPath, testGroundTruthDirPath, mem, vecsize, num_class, num_test, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total Naive Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.data_aux.Initialize.java
License:Apache License
public boolean loadSysArgs() throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {//{{{ if (this.args.length < this.sys_args_num) { System.err.println("Wrong Command line args number"); ToolRunner.printGenericCommandUsage(System.err); return false; }//from w w w.j a v a 2 s. c o m // init system args this.num_mapper = Integer.parseInt(args[0]); this.num_thread = Integer.parseInt(args[1]); this.mem = Integer.parseInt(args[2]); this.iterations = Integer.parseInt(args[3]); this.inputDir = args[4]; this.workDir = args[5]; //create HDFS directories FileSystem fs = FileSystem.get(this.conf); this.inputPath = new Path(this.inputDir); this.workPath = new Path(this.workDir); if (fs.exists(workPath)) { fs.delete(workPath, true); fs.mkdirs(workPath); } this.outputPath = new Path(this.workDir, "output"); //config Constants value this.conf.setInt(HarpDAALConstants.NUM_MAPPERS, this.num_mapper); this.conf.setInt(HarpDAALConstants.NUM_THREADS, this.num_thread); this.conf.setInt(HarpDAALConstants.NUM_ITERATIONS, this.iterations); return true; }
From source file:edu.iu.kmeans.DataGen.java
License:Apache License
/** * Generate centroids and upload to the cDir * //from w ww.j a v a 2 s . c om * @param numCentroids * @param vectorSize * @param configuration * @param random * @param cDir * @param fs * @throws IOException */ static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cDir, FileSystem fs, int startJobID) throws IOException { Random random = new Random(); double[] data = null; if (fs.exists(cDir)) fs.delete(cDir, true); if (!fs.mkdirs(cDir)) { throw new IOException("Mkdirs failed to create " + cDir.toString()); } data = new double[numCentroids * vectorSize]; for (int i = 0; i < data.length; i++) { // data[i] = 1000; data[i] = random.nextDouble() * 1000; } Path initClustersFile = new Path(cDir, KMeansConstants.CENTROID_FILE_PREFIX + startJobID); System.out.println("Generate centroid data." + initClustersFile.toString()); FSDataOutputStream out = fs.create(initClustersFile, true); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); for (int i = 0; i < data.length; i++) { // out.writeDouble(data[i]); bw.write(data[i] + " "); if ((i % vectorSize) == (vectorSize - 1)) { bw.write(data[i] + ""); bw.newLine(); } else { bw.write(data[i] + " "); } } bw.flush(); bw.close(); // out.flush(); // out.sync(); // out.close(); System.out.println("Wrote centroids data to file"); }
From source file:edu.iu.kmeans.KMeansMapCollective.java
License:Apache License
private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks, int numIterations, int iterationPerJob, int startJobID, String workDir, String localPointFilesDir, boolean generateData) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {//w w w .ja v a 2 s .co m Configuration configuration = getConf(); Path workDirPath = new Path(workDir); FileSystem fs = FileSystem.get(configuration); Path dataDir = new Path(workDirPath, "data"); Path cenDir = new Path(workDirPath, "centroids"); Path outDir = new Path(workDirPath, "out"); if (fs.exists(outDir)) { fs.delete(outDir, true); } fs.mkdirs(outDir); if (generateData) { System.out.println("Generate data."); DataGen.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs, dataDir, cenDir, localPointFilesDir, startJobID); } DataGen.generateCentroids(numCentroids, vectorSize, configuration, cenDir, fs, startJobID); long startTime = System.currentTimeMillis(); runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numIterations, iterationPerJob, startJobID, numPointFiles, numMapTasks, configuration, workDirPath, dataDir, cenDir, outDir); long endTime = System.currentTimeMillis(); System.out.println("Total K-means Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.kmeans.regroupallgather.KMeansLauncher.java
License:Apache License
private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks, int numThreads, int numIterations, String workDir, String localPointFilesDir, boolean generateData) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {/*from ww w . j a va2 s .c o m*/ Configuration configuration = getConf(); Path workDirPath = new Path(workDir); FileSystem fs = FileSystem.get(configuration); Path dataDir = new Path(workDirPath, "data"); Path cenDir = new Path(workDirPath, "centroids"); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } fs.mkdirs(cenDir); Path outDir = new Path(workDirPath, "out"); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (generateData) { System.out.println("Generate data."); KMUtil.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs, dataDir, cenDir, localPointFilesDir); } KMUtil.generateCentroids(numCentroids, vectorSize, configuration, cenDir, fs); long startTime = System.currentTimeMillis(); runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numPointFiles, numMapTasks, numThreads, numIterations, dataDir, cenDir, outDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total K-means Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.kmeans.regroupallgather.KMUtil.java
License:Apache License
/** * Generate centroids and upload to the cDir * /*from w w w. j a va 2 s . c om*/ * @param numCentroids * @param vectorSize * @param configuration * @param random * @param cenDir * @param fs * @throws IOException */ static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cenDir, FileSystem fs) throws IOException { Random random = new Random(); double[] data = null; if (fs.exists(cenDir)) fs.delete(cenDir, true); if (!fs.mkdirs(cenDir)) { throw new IOException("Mkdirs failed to create " + cenDir.toString()); } data = new double[numCentroids * vectorSize]; for (int i = 0; i < data.length; i++) { // data[i] = 1000; data[i] = random.nextDouble() * 1000; } Path initClustersFile = new Path(cenDir, Constants.CENTROID_FILE_NAME); System.out.println("Generate centroid data." + initClustersFile.toString()); FSDataOutputStream out = fs.create(initClustersFile, true); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); for (int i = 0; i < data.length; i++) { if ((i % vectorSize) == (vectorSize - 1)) { bw.write(data[i] + ""); bw.newLine(); } else { bw.write(data[i] + " "); } } bw.flush(); bw.close(); System.out.println("Wrote centroids data to file"); }
From source file:edu.iu.kmeans.rotation.KMeansLauncher.java
License:Apache License
private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks, int numThreads, int numIterations, String workDir, String localPointFilesDir, boolean generateData) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {//ww w . j a va2 s .c o m Configuration configuration = getConf(); Path workDirPath = new Path(workDir); FileSystem fs = FileSystem.get(configuration); Path dataDir = new Path(workDirPath, "data"); Path cenDir = new Path(workDirPath, "centroids"); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } fs.mkdirs(cenDir); Path outDir = new Path(workDirPath, "out"); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (generateData) { System.out.println("Generate data."); KMUtil.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs, dataDir, cenDir, localPointFilesDir); } long startTime = System.currentTimeMillis(); runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numPointFiles, numMapTasks, numThreads, numIterations, dataDir, cenDir, outDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total K-means Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java
License:Apache License
/** * Launches all the tasks in order.//from w w w . j ava 2s . c o m */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); // load args init.loadSysArgs(); init.loadDistributedLibs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2])); conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3])); conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4])); conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5])); conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6])); // config job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class); // initialize centroids data JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration(); FileSystem fs = FileSystem.get(conf); int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]); int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]); Path workPath = init.getWorkPath(); Path cenDir = new Path(workPath, "centroids"); fs.mkdirs(cenDir); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } Path initCenDir = new Path(cenDir, "init_centroids"); DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs); thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString()); thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids"); //generate Data if required boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]); if (generateData) { Path inputPath = init.getInputPath(); int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]); int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]); String tmpDirPathName = args[init.getSysArgNum() + 10]; DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath, tmpDirPathName, fs); } // finish job boolean jobSuccess = kmeansJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { kmeansJob.killJob(); System.out.println("kmeansJob failed"); } return 0; }
From source file:edu.iu.lda.LDALauncher.java
License:Apache License
private void launch(String docDirPath, int numTopics, double alpha, double beta, int numIterations, int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, String workDirPath, boolean printModel) throws IOException, URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException { Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path docDir = new Path(docDirPath); Path workDir = new Path(workDirPath); if (fs.exists(workDir)) { fs.delete(workDir, true);//from ww w .j a v a2s . co m fs.mkdirs(workDir); } Path modelDir = new Path(workDirPath, "model"); fs.mkdirs(modelDir); // Do not make output dir Path outputDir = new Path(workDirPath, "output"); long startTime = System.currentTimeMillis(); runLDA(docDir, numTopics, alpha, beta, numIterations, minBound, maxBound, numMapTasks, numThreadsPerWorker, scheduleRatio, mem, printModel, modelDir, outputDir, configuration); long endTime = System.currentTimeMillis(); System.out.println("Total LDA Execution Time: " + (endTime - startTime)); }
From source file:edu.iu.lda.LDAMPCollectiveMapper.java
License:Apache License
private void printWordTableMap(Table<TopicCountList>[] wordTableMap, String folderPath, int selfID, Configuration congfiguration) throws IOException { FileSystem fs = FileSystem.get(congfiguration); Path folder = new Path(folderPath); if (!fs.exists(folder)) { fs.mkdirs(folder); }/*ww w. j av a 2 s. c o m*/ Path file = new Path(folderPath + "/" + selfID); PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fs.create(file)))); for (Table<TopicCountList> wTable : wordTableMap) { for (Partition<TopicCountList> wPartition : wTable.getPartitions()) { int wordID = wPartition.id(); LongArrayList wRow = wPartition.get().getTopicCount(); // Print word writer.print(wordID); // Print topic count for (int i = 0; i < wRow.size(); i++) { long t = wRow.getLong(i); writer.print(" " + (int) t + ":" + (int) (t >>> 32)); } writer.println(); } } writer.flush(); writer.close(); }