Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:edu.iu.daal_naive.NaiveDaalLauncher.java

License:Apache License

private void launch(String inputDirPath, String testDirPath, String testGroundTruthDirPath, String workDirPath,
        int mem, int vecsize, int num_class, int num_test, int numMapTasks, int numThreadsPerWorker,
        boolean generateData, int num_train_points, int numfiles) throws IOException, URISyntaxException,
        InterruptedException, ExecutionException, ClassNotFoundException {

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path inputDir = new Path(inputDirPath);
    Path testDir = new Path(testDirPath);
    Path testGroundTruthDir = new Path(testGroundTruthDirPath);

    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);/*from  ww  w.  j a va  2s.co m*/
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();

    //test and generate training datasets
    if (generateData) {
        System.out.println("Generate Naive Baytes Training datasets.");
        NaiveUtil.generateData(num_train_points, num_test, vecsize, numfiles, num_class, fs, "/tmp/naive",
                inputDir, testDir, testGroundTruthDir);
    }

    runNaive(inputDir, testDirPath, testGroundTruthDirPath, mem, vecsize, num_class, num_test, numMapTasks,
            numThreadsPerWorker, modelDir, outputDir, configuration);

    long endTime = System.currentTimeMillis();
    System.out.println("Total Naive Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.data_aux.Initialize.java

License:Apache License

public boolean loadSysArgs() throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {//{{{
    if (this.args.length < this.sys_args_num) {
        System.err.println("Wrong Command line args number");
        ToolRunner.printGenericCommandUsage(System.err);
        return false;
    }//from w w w.j  a v  a 2 s. c  o m

    // init system args
    this.num_mapper = Integer.parseInt(args[0]);
    this.num_thread = Integer.parseInt(args[1]);
    this.mem = Integer.parseInt(args[2]);
    this.iterations = Integer.parseInt(args[3]);
    this.inputDir = args[4];
    this.workDir = args[5];

    //create HDFS directories
    FileSystem fs = FileSystem.get(this.conf);
    this.inputPath = new Path(this.inputDir);
    this.workPath = new Path(this.workDir);
    if (fs.exists(workPath)) {
        fs.delete(workPath, true);
        fs.mkdirs(workPath);
    }
    this.outputPath = new Path(this.workDir, "output");

    //config Constants value
    this.conf.setInt(HarpDAALConstants.NUM_MAPPERS, this.num_mapper);
    this.conf.setInt(HarpDAALConstants.NUM_THREADS, this.num_thread);
    this.conf.setInt(HarpDAALConstants.NUM_ITERATIONS, this.iterations);

    return true;
}

From source file:edu.iu.kmeans.DataGen.java

License:Apache License

/**
 * Generate centroids and upload to the cDir
 * //from   w  ww.j a  v a  2 s  .  c om
 * @param numCentroids
 * @param vectorSize
 * @param configuration
 * @param random
 * @param cDir
 * @param fs
 * @throws IOException
 */
static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cDir,
        FileSystem fs, int startJobID) throws IOException {
    Random random = new Random();
    double[] data = null;
    if (fs.exists(cDir))
        fs.delete(cDir, true);
    if (!fs.mkdirs(cDir)) {
        throw new IOException("Mkdirs failed to create " + cDir.toString());
    }
    data = new double[numCentroids * vectorSize];
    for (int i = 0; i < data.length; i++) {
        // data[i] = 1000;
        data[i] = random.nextDouble() * 1000;
    }
    Path initClustersFile = new Path(cDir, KMeansConstants.CENTROID_FILE_PREFIX + startJobID);
    System.out.println("Generate centroid data." + initClustersFile.toString());
    FSDataOutputStream out = fs.create(initClustersFile, true);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));
    for (int i = 0; i < data.length; i++) {
        // out.writeDouble(data[i]);
        bw.write(data[i] + " ");
        if ((i % vectorSize) == (vectorSize - 1)) {
            bw.write(data[i] + "");
            bw.newLine();
        } else {
            bw.write(data[i] + " ");
        }
    }
    bw.flush();
    bw.close();
    // out.flush();
    // out.sync();
    // out.close();
    System.out.println("Wrote centroids data to file");
}

From source file:edu.iu.kmeans.KMeansMapCollective.java

License:Apache License

private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks,
        int numIterations, int iterationPerJob, int startJobID, String workDir, String localPointFilesDir,
        boolean generateData) throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {//w w w  .ja v a 2 s  .co  m
    Configuration configuration = getConf();
    Path workDirPath = new Path(workDir);
    FileSystem fs = FileSystem.get(configuration);
    Path dataDir = new Path(workDirPath, "data");
    Path cenDir = new Path(workDirPath, "centroids");
    Path outDir = new Path(workDirPath, "out");
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    fs.mkdirs(outDir);
    if (generateData) {
        System.out.println("Generate data.");
        DataGen.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs,
                dataDir, cenDir, localPointFilesDir, startJobID);
    }
    DataGen.generateCentroids(numCentroids, vectorSize, configuration, cenDir, fs, startJobID);
    long startTime = System.currentTimeMillis();
    runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numIterations, iterationPerJob, startJobID,
            numPointFiles, numMapTasks, configuration, workDirPath, dataDir, cenDir, outDir);
    long endTime = System.currentTimeMillis();
    System.out.println("Total K-means Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.kmeans.regroupallgather.KMeansLauncher.java

License:Apache License

private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks,
        int numThreads, int numIterations, String workDir, String localPointFilesDir, boolean generateData)
        throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {/*from ww w .  j  a  va2 s  .c  o m*/
    Configuration configuration = getConf();
    Path workDirPath = new Path(workDir);
    FileSystem fs = FileSystem.get(configuration);
    Path dataDir = new Path(workDirPath, "data");
    Path cenDir = new Path(workDirPath, "centroids");
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }
    fs.mkdirs(cenDir);
    Path outDir = new Path(workDirPath, "out");
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    if (generateData) {
        System.out.println("Generate data.");
        KMUtil.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs,
                dataDir, cenDir, localPointFilesDir);
    }
    KMUtil.generateCentroids(numCentroids, vectorSize, configuration, cenDir, fs);
    long startTime = System.currentTimeMillis();
    runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numPointFiles, numMapTasks, numThreads,
            numIterations, dataDir, cenDir, outDir, configuration);
    long endTime = System.currentTimeMillis();
    System.out.println("Total K-means Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.kmeans.regroupallgather.KMUtil.java

License:Apache License

/**
 * Generate centroids and upload to the cDir
 * /*from w  w  w. j  a va 2  s . c  om*/
 * @param numCentroids
 * @param vectorSize
 * @param configuration
 * @param random
 * @param cenDir
 * @param fs
 * @throws IOException
 */
static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cenDir,
        FileSystem fs) throws IOException {
    Random random = new Random();
    double[] data = null;
    if (fs.exists(cenDir))
        fs.delete(cenDir, true);
    if (!fs.mkdirs(cenDir)) {
        throw new IOException("Mkdirs failed to create " + cenDir.toString());
    }
    data = new double[numCentroids * vectorSize];
    for (int i = 0; i < data.length; i++) {
        // data[i] = 1000;
        data[i] = random.nextDouble() * 1000;
    }
    Path initClustersFile = new Path(cenDir, Constants.CENTROID_FILE_NAME);
    System.out.println("Generate centroid data." + initClustersFile.toString());
    FSDataOutputStream out = fs.create(initClustersFile, true);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));
    for (int i = 0; i < data.length; i++) {
        if ((i % vectorSize) == (vectorSize - 1)) {
            bw.write(data[i] + "");
            bw.newLine();
        } else {
            bw.write(data[i] + " ");
        }
    }
    bw.flush();
    bw.close();
    System.out.println("Wrote centroids data to file");
}

From source file:edu.iu.kmeans.rotation.KMeansLauncher.java

License:Apache License

private void launch(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks,
        int numThreads, int numIterations, String workDir, String localPointFilesDir, boolean generateData)
        throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {//ww  w  .  j a  va2 s  .c o m
    Configuration configuration = getConf();
    Path workDirPath = new Path(workDir);
    FileSystem fs = FileSystem.get(configuration);
    Path dataDir = new Path(workDirPath, "data");
    Path cenDir = new Path(workDirPath, "centroids");
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }
    fs.mkdirs(cenDir);
    Path outDir = new Path(workDirPath, "out");
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    if (generateData) {
        System.out.println("Generate data.");
        KMUtil.generateData(numOfDataPoints, numCentroids, vectorSize, numPointFiles, configuration, fs,
                dataDir, cenDir, localPointFilesDir);
    }
    long startTime = System.currentTimeMillis();
    runKMeansAllReduce(numOfDataPoints, numCentroids, vectorSize, numPointFiles, numMapTasks, numThreads,
            numIterations, dataDir, cenDir, outDir, configuration);
    long endTime = System.currentTimeMillis();
    System.out.println("Total K-means Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java

License:Apache License

/**
 * Launches all the tasks in order.//from   w  w w  . j ava 2s  . c o  m
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    // load args
    init.loadSysArgs();

    init.loadDistributedLibs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2]));
    conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3]));
    conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4]));
    conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5]));
    conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6]));

    // config job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class);

    // initialize centroids data
    JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]);
    int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]);
    Path workPath = init.getWorkPath();
    Path cenDir = new Path(workPath, "centroids");
    fs.mkdirs(cenDir);
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }

    Path initCenDir = new Path(cenDir, "init_centroids");
    DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs);
    thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString());
    thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids");

    //generate Data if required
    boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]);
    if (generateData) {
        Path inputPath = init.getInputPath();
        int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]);
        int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]);
        String tmpDirPathName = args[init.getSysArgNum() + 10];

        DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath,
                tmpDirPathName, fs);
    }

    // finish job
    boolean jobSuccess = kmeansJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        kmeansJob.killJob();
        System.out.println("kmeansJob failed");
    }

    return 0;
}

From source file:edu.iu.lda.LDALauncher.java

License:Apache License

private void launch(String docDirPath, int numTopics, double alpha, double beta, int numIterations,
        int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem,
        String workDirPath, boolean printModel) throws IOException, URISyntaxException, InterruptedException,
        ExecutionException, ClassNotFoundException {
    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path docDir = new Path(docDirPath);
    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);//from ww w .j a  v a2s .  co m
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();
    runLDA(docDir, numTopics, alpha, beta, numIterations, minBound, maxBound, numMapTasks, numThreadsPerWorker,
            scheduleRatio, mem, printModel, modelDir, outputDir, configuration);
    long endTime = System.currentTimeMillis();
    System.out.println("Total LDA Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.lda.LDAMPCollectiveMapper.java

License:Apache License

private void printWordTableMap(Table<TopicCountList>[] wordTableMap, String folderPath, int selfID,
        Configuration congfiguration) throws IOException {
    FileSystem fs = FileSystem.get(congfiguration);
    Path folder = new Path(folderPath);
    if (!fs.exists(folder)) {
        fs.mkdirs(folder);
    }/*ww  w. j av a 2 s.  c o  m*/
    Path file = new Path(folderPath + "/" + selfID);
    PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fs.create(file))));
    for (Table<TopicCountList> wTable : wordTableMap) {
        for (Partition<TopicCountList> wPartition : wTable.getPartitions()) {
            int wordID = wPartition.id();
            LongArrayList wRow = wPartition.get().getTopicCount();
            // Print word
            writer.print(wordID);
            // Print topic count
            for (int i = 0; i < wRow.size(); i++) {
                long t = wRow.getLong(i);
                writer.print(" " + (int) t + ":" + (int) (t >>> 32));
            }
            writer.println();
        }
    }
    writer.flush();
    writer.close();
}