Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

private void moveTaskOutputsToIRODS(TaskAttemptContext context, FileSystem outfs, Path outDir,
        FileSystem workfs, Path workOutput) throws IOException {
    context.progress();//from ww w . jav a 2 s .  co  m
    if (workfs.isFile(workOutput)) {
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        FSDataOutputStream irods_os = null;
        FSDataInputStream temp_is = null;
        try {
            // commit to iRODS
            irods_os = outfs.create(finalOutputPath, true);
            temp_is = workfs.open(workOutput);

            byte[] buffer = new byte[100 * 1024];
            int bytes_read = 0;

            while ((bytes_read = temp_is.read(buffer)) != -1) {
                irods_os.write(buffer, 0, bytes_read);
            }
        } finally {
            if (temp_is != null) {
                try {
                    temp_is.close();
                } catch (IOException ex) {
                    // ignore exceptions
                }
            }

            // remove temporary file
            try {
                workfs.delete(workOutput, true);
            } catch (IOException ex) {
                // ignore exceptions
            }

            if (irods_os != null) {
                irods_os.close();
            }
        }

        LOG.debug("Moved " + workOutput + " to " + finalOutputPath);
    } else if (workfs.getFileStatus(workOutput).isDir()) {
        FileStatus[] paths = workfs.listStatus(workOutput);
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        outfs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputsToIRODS(context, outfs, outDir, workfs, path.getPath());
            }
        }
    }
}

From source file:edu.emory.bmi.medicurator.storage.HdfsStorage.java

License:Apache License

/**
 * save data from InputStream to a relative path
 * required parent directories will be created if not exists
 * @param path String path/*www.j a va  2 s  .  c o  m*/
 * @param in InputStream
  * @return boolean
  */
public boolean saveToPath(String path, InputStream in) {
    try {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf);
        Path dst = new Path(hdfsPath + path);

        //Create a new file and write data to it.
        hdfs.mkdirs(dst.getParent());
        FSDataOutputStream out = hdfs.create(dst, true);
        byte[] b = new byte[1024];
        int numBytes = 0;
        while ((numBytes = in.read(b)) > 0) {
            out.write(b, 0, numBytes);
        }
        //Close all the file descripters
        out.close();
        hdfs.close();
        return true;
    } catch (Exception e) {
        System.out.println("[ERROR]  Exception occurs when HdfsStorage saveToPath (" + path + ")" + e);
    }
    return false;
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.PairWiseDistance.java

License:Open Source License

public int execJob(Configuration conf, String sequenceFileFullPath, String sequenceFile, String distDir)
        throws Exception {
    /* input parameters */
    LOG.info(sequenceFileFullPath);/* w w w  .ja  v a  2  s. c  o m*/
    Job job = new Job(conf, "Pairwise-calc-" + sequenceFile);

    /* create the base dir for this job. Delete and recreates if it exists */
    Path hdMainDir = new Path(distDir + "/" + sequenceFile);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(hdMainDir, true);
    Path hdInputDir = new Path(hdMainDir, "data");
    if (!fs.mkdirs(hdInputDir)) {
        throw new IOException("Mkdirs failed to create " + hdInputDir.toString());
    }

    int noOfSequences = getNoOfSequences(sequenceFileFullPath, fs);
    int noOfDivisions = (int) Math.ceil(noOfSequences / (double) blockSize);
    int noOfBlocks = (noOfDivisions * (noOfDivisions + 1)) / 2;
    LOG.info("No of divisions :" + noOfDivisions + "\nNo of blocks :" + noOfBlocks + "\nBlock size :"
            + blockSize);

    // Retrieving the configuration form the job to set the properties
    // Setting properties to the original conf does not work (possible
    // Hadoop bug)
    Configuration jobConf = job.getConfiguration();

    // Input dir in HDFS. Create this in newly created job base dir
    Path inputDir = new Path(hdMainDir, "input");
    if (!fs.mkdirs(inputDir)) {
        throw new IOException("Mkdirs failed to create " + inputDir.toString());
    }

    Long dataPartitionStartTime = System.nanoTime();
    partitionData(sequenceFileFullPath, noOfSequences, blockSize, fs, noOfDivisions, jobConf, inputDir);

    distributeData(blockSize, conf, fs, hdInputDir, noOfDivisions);

    long dataPartTime = (System.nanoTime() - dataPartitionStartTime) / 1000000;
    LOG.info("Data Partition & Scatter Completed in (ms):" + dataPartTime);

    // Output dir in HDFS
    Path hdOutDir = new Path(hdMainDir, "out");

    jobConf.setInt(Constants.BLOCK_SIZE, blockSize);
    jobConf.setInt(Constants.NO_OF_DIVISIONS, noOfDivisions);
    jobConf.setInt(Constants.NO_OF_SEQUENCES, noOfSequences);
    jobConf.set(Constants.DIST_FUNC, distFunc);

    job.setJarByClass(PairWiseDistance.class);
    job.setMapperClass(SWGMap.class);
    job.setReducerClass(SWGReduce.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(SWGWritable.class);
    FileInputFormat.setInputPaths(job, hdInputDir);
    FileOutputFormat.setOutputPath(job, hdOutDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(noOfDivisions);

    long startTime = System.currentTimeMillis();
    int exitStatus = job.waitForCompletion(true) ? 0 : 1;
    double executionTime = (System.currentTimeMillis() - startTime) / 1000.0;
    LOG.info("Job Finished in " + executionTime + " seconds");
    LOG.info("# #seq\t#blockS\tTtime\tinput\tdataDistTime\toutput" + noOfSequences + "\t" + noOfBlocks + "\t"
            + executionTime + "\t" + sequenceFileFullPath + "\t" + dataPartTime + "\t" + hdMainDir);

    return exitStatus;
}

From source file:edu.isi.mavuno.util.MavunoUtils.java

License:Apache License

public static void createDirectory(Configuration conf, String path) throws IOException {
    FileSystem fs;
    try {//  www . j  av a 2  s  .c  o m
        fs = FileSystem.get(new URI(path), conf);
    } catch (URISyntaxException e) {
        throw new RuntimeException("Error creating directory -- " + path);
    }
    fs.mkdirs(new Path(path));
}

From source file:edu.iu.ccd.CCDLauncher.java

License:Apache License

private void launch(String inputDirPath, int r, double lambda, int numIterations, int numMapTasks,
        int numThreadsPerWorker, int numModelSlices, String workDirPath, String testFilePath)
        throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {/*from   w  ww . j  av  a2 s. c  o m*/
    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path inputDir = new Path(inputDirPath);
    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();
    runCCD(inputDir, r, lambda, numIterations, numMapTasks, numThreadsPerWorker, numModelSlices, modelDir,
            outputDir, testFilePath, configuration);
    long endTime = System.currentTimeMillis();
    System.out.println("Total SGD Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.daal_cov.COVDaalLauncher.java

License:Apache License

private void launch(String inputDirPath, String workDirPath, int mem, int numMapTasks, int numThreadsPerWorker)
        throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {//  www  .ja va 2s  . c  o m

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path inputDir = new Path(inputDirPath);
    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();

    runCOV(inputDir, mem, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration);

    long endTime = System.currentTimeMillis();
    System.out.println("Total COV Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.daal_kmeans.regroupallgather.KMeansDaalLauncher.java

License:Apache License

/**
 * Launches all the tasks in order./*from   ww w. jav a  2  s. c  o m*/
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();

    // load args
    init.loadSysArgs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2]));

    // config job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job kmeansJob = init.createJob("kmeansJob", KMeansDaalLauncher.class, KMeansDaalCollectiveMapper.class);

    // initialize centroids data
    JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]);
    int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]);
    Path workPath = init.getWorkPath();
    Path cenDir = new Path(workPath, "centroids");
    fs.mkdirs(cenDir);
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }

    Path initCenDir = new Path(cenDir, "init_centroids");
    DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs);
    thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString());
    thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids");

    //generate Data if required
    boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 3]);
    if (generateData) {
        Path inputPath = init.getInputPath();
        int total_points = Integer.parseInt(args[init.getSysArgNum() + 4]);
        int total_files = Integer.parseInt(args[init.getSysArgNum() + 5]);
        String tmpDirPathName = args[init.getSysArgNum() + 6];

        DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath,
                tmpDirPathName, fs);
    }

    // finish job
    boolean jobSuccess = kmeansJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        kmeansJob.killJob();
        System.out.println("kmeansJob failed");
    }

    return 0;
}

From source file:edu.iu.daal_kmeans.regroupallgather.KMUtil.java

License:Apache License

/**
 * Generate centroids and upload to the cDir
 * /*from   ww w  . j a  v  a2  s.  c  o m*/
 * @param numCentroids
 * @param vectorSize
 * @param configuration
 * @param random
 * @param cenDir
 * @param fs
 * @throws IOException
 */
static void generateCentroids(int numCentroids, int vectorSize, Configuration configuration, Path cenDir,
        FileSystem fs) throws IOException {
    Random random = new Random();
    double[] data = null;
    if (fs.exists(cenDir))
        fs.delete(cenDir, true);
    if (!fs.mkdirs(cenDir)) {
        throw new IOException("Mkdirs failed to create " + cenDir.toString());
    }
    data = new double[numCentroids * vectorSize];
    for (int i = 0; i < data.length; i++) {
        data[i] = random.nextDouble() * 1000;
    }
    Path initClustersFile = new Path(cenDir, Constants.CENTROID_FILE_NAME);
    System.out.println("Generate centroid data." + initClustersFile.toString());
    FSDataOutputStream out = fs.create(initClustersFile, true);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));
    for (int i = 0; i < data.length; i++) {
        if ((i % vectorSize) == (vectorSize - 1)) {
            bw.write(data[i] + "");
            bw.newLine();
        } else {
            bw.write(data[i] + " ");
        }
    }
    bw.flush();
    bw.close();
    System.out.println("Wrote centroids data to file");
}

From source file:edu.iu.daal_linreg.LinRegDaalLauncher.java

License:Apache License

private void launch(String inputDirPath, String testDirPath, String testGroundTruthDirPath, String workDirPath,
        int mem, int batchSize, int numMapTasks, int numThreadsPerWorker) throws IOException,
        URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path inputDir = new Path(inputDirPath);
    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);/*from w w  w.j av  a 2 s . co m*/
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();

    runLinReg(inputDir, testDirPath, testGroundTruthDirPath, mem, batchSize, numMapTasks, numThreadsPerWorker,
            modelDir, outputDir, configuration);

    long endTime = System.currentTimeMillis();
    System.out.println("Total Linear Regression Execution Time: " + (endTime - startTime));
}

From source file:edu.iu.daal_mom.MOMDaalLauncher.java

License:Apache License

private void launch(String inputDirPath, String workDirPath, int mem, int numMapTasks, int numThreadsPerWorker)
        throws IOException, URISyntaxException, InterruptedException, ExecutionException,
        ClassNotFoundException {/*  w  ww  .  j ava2s .  c  o  m*/

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path inputDir = new Path(inputDirPath);
    Path workDir = new Path(workDirPath);
    if (fs.exists(workDir)) {
        fs.delete(workDir, true);
        fs.mkdirs(workDir);
    }
    Path modelDir = new Path(workDirPath, "model");
    fs.mkdirs(modelDir);
    // Do not make output dir
    Path outputDir = new Path(workDirPath, "output");
    long startTime = System.currentTimeMillis();

    runMOM(inputDir, mem, numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration);

    long endTime = System.currentTimeMillis();
    System.out.println("Total MOM Execution Time: " + (endTime - startTime));
}