Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java

License:Open Source License

public static Path createEmptyDir(FileSystem fileSystem, Path directory, String directoryName)
        throws IOException {
    Path dir = HadoopFileTestUtils.createPath(directory.toString(), directoryName);

    assertTrue(fileSystem.mkdirs(dir));

    return fileSystem.getFileStatus(dir).getPath();
}

From source file:com.yss.util.YarnUtil.java

License:Open Source License

@SuppressWarnings("rawtypes")
public static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf,
        YarnConfiguration yarnConf) throws IOException {
    // dump stringwriter's content into FS conf/storm.yaml
    Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING);
    Path dirDst = confDst.getParent();
    fs.mkdirs(dirDst);

    //storm.yaml/*from   w w w.  j av a 2s.  c o m*/
    FSDataOutputStream out = fs.create(confDst);
    Yaml yaml = new Yaml();
    OutputStreamWriter writer = new OutputStreamWriter(out);
    rmNulls(stormConf);

    yaml.dump(stormConf, writer);

    writer.close();
    out.close();

    //yarn-site.xml
    Path yarn_site_xml = new Path(dirDst, "yarn-site.xml");
    out = fs.create(yarn_site_xml);
    writer = new OutputStreamWriter(out);
    yarnConf.writeXml(writer);
    writer.close();
    out.close();
    return dirDst;
}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

@Override
public boolean mkdirs(String dirPath) throws IOException {
    Path dstPath = new Path(dirPath);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    return dstFs.mkdirs(dstPath);
}

From source file:datafu.hourglass.demo.GenerateIds.java

License:Apache License

private void createDataForDate(FileSystem fs, Path outputPath, Date date) throws IOException {
    // make sure output path exists
    if (!fs.exists(outputPath)) {
        fs.mkdirs(outputPath);
    }/*from   w w w.ja va  2s  . c  o m*/

    Path datePath = new Path(outputPath, dateFormat.format(date));

    System.out.println("Writing to " + datePath.toString() + " with range " + startId + " to " + endId);

    DataFileWriter<GenericRecord> dataWriter;
    OutputStream outputStream;

    Path dailyPath = outputPath;
    Path path = new Path(dailyPath, dateFormat.format(date));

    // delete directory if it already exists
    if (fs.exists(path)) {
        fs.delete(path, true);
    }

    outputStream = fs.create(new Path(path, "part-00000.avro"));

    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
    dataWriter = new DataFileWriter<GenericRecord>(writer);
    dataWriter.create(EVENT_SCHEMA, outputStream);

    GenericRecord record = new GenericData.Record(EVENT_SCHEMA);
    // create 1000 random IDs
    for (int i = 0; i < 1000; i++) {
        long val;
        if (startId == endId) {
            val = startId;
        } else {
            val = (long) (startId + random.nextInt(endId - startId + 1));
        }
        record.put("id", val);
        dataWriter.append(record);
    }

    dataWriter.close();
    outputStream.close();
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Run the job and wait for it to complete.  Output will be temporarily stored under the staging path.
 * If the job is successful it will be moved to the final location.
 *///from   ww  w .j  av a2s.  com
@Override
public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);
    final Path stagedPath = new Path(String.format("%s/%s/staged", _stagingPrefix, System.currentTimeMillis()));

    FileOutputFormat.setOutputPath(this, stagedPath);

    final Thread hook = new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                killJob();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });

    Runtime.getRuntime().addShutdownHook(hook);

    final boolean retVal = super.waitForCompletion(verbose);
    Runtime.getRuntime().removeShutdownHook(hook);

    if (retVal) {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());

        fs.mkdirs(actualOutputPath);

        _log.info(String.format("Deleting data at old path[%s]", actualOutputPath));
        fs.delete(actualOutputPath, true);

        _log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath,
                actualOutputPath));
        boolean renamed = fs.rename(stagedPath, actualOutputPath);

        if (renamed && _writeCounters) {
            writeCounters(fs);
        }

        return renamed;
    } else {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());
        _log.info(String.format("Job failed, deleting staged path[%s]", stagedPath));
        try {
            fs.delete(stagedPath, true);
        } catch (IOException e) {
        }
    }

    _log.warn("retVal was false for some reason...");
    return retVal;
}

From source file:de.gesundkrank.wikipedia.hadoop.io.WikiDumpLoader.java

License:Open Source License

/**
 * Returns {@link FileStatus} of the latest dump in the HDFS
 *
 * @param fs       HDFS/*from   ww  w.ja v a2  s.c  om*/
 * @param basepath Base path of hdfs wikidumps
 * @return
 */
private FileStatus checkLocalDumps(FileSystem fs, Path basepath) {

    long lastLocalChange = 0;
    FileStatus lastLocalDump = null;
    try {
        if (!fs.exists(basepath)) {
            fs.mkdirs(basepath);
            return null;
        }

        FileStatus[] stati = fs.listStatus(basepath);

        for (FileStatus status : stati) {
            long fileChange = status.getModificationTime();
            if (fileChange > lastLocalChange) {
                lastLocalDump = status;
            }
        }
    } catch (IOException e) {
        logger.error(e);
    }
    return lastLocalDump;

}

From source file:de.mpii.fsm.driver.FsmDriver.java

License:Apache License

/**
 * (non-Javadoc)/* w ww . ja  va2s.  c  o m*/
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 * 
 * Add the appropriate options here. Execute the MG-FSM algorithm 
 * according to the parameters specified at run time.
 * 
 * @param String[] args 
 * @return int
 */
@Override
public int run(String[] args) throws Exception {
    /* Here parameters that will be available to the user 
     * during run time are specified and intialized. */

    /* Hadooop-config options */
    addOutputOption();

    /*User-interesting options*/
    addOption("input", "i", "(Optional) Specify the path from where the input is to be read"
            + "\n NOTE: This option can not be used with -(r)esume option.", null);

    addOption("support", "s", "(Optional) Minimum support (sigma) " + "\nDefault Value: 1\n",
            FsmConfig.SIGMA_DEFAULT_STRING);

    addOption("gamma", "g", "(Optional) Maximum allowed for mining frequent sequences (gamma)" + " by MG-FSM "
            + "\nDefault Value: 2\n", FsmConfig.GAMMA_DEFAULT_STRING);

    addOption("lambda", "l",
            "(Optional) Maximum length for mining frequent sequences (lambda)" + "\nDefault Value: 5\n",
            FsmConfig.LAMBDA_DEFAULT_STRING);

    addOption("execMode", "m", "Method of execution viz. s -(s)equential or d -(d)istributed"
            + "\nDefault Value: (s)-sequential\n", FsmConfig.DEFAULT_EXEC_MODE);

    addOption("type", "t",
            "(Optional) Specify the mining mode." + "\nExpected values for input:"
                    + "\n1. a -(a)ll\n2. m -(m)aximal \n3. c -(c)losed" + "\nDefault Value : a -(a)ll\n",
            FsmConfig.DEFAULT_TYPE);

    /* keepFiles default value is null.
     * It will be set to a temporary location, in case
     * no path is specified.*/
    addOption("keepFiles", "k",
            "(Optional) Keep the intermediary files " + "for later use or runs. The files stored are:"
                    + "\n1. Dictionary \n2. Encoded Sequences \n "
                    + "Specify the intermediate path where to keep these files :",
            null);

    /* resume points to the location where the 
     * intermediary files are located*/
    addOption("resume", "r", "(Optional) Resume running further " + "runs of the MG-FSM algorithm on"
            + " already encoded transaction file located in the folder specified in input.\n", null);

    /*Developer-interesting options*/
    addOption("partitionSize", "p",
            "(Optional) Explicitly specify the partition size." + "\nDefault Value: 10000",
            FsmConfig.DEFAULT_PARTITION_SIZE);

    addOption("indexing", "id",
            "(Optional) Specify the indexing mode." + "\nExpected values for input:"
                    + "\n1. none\n2. minmax \n3. full" + "\nDefault Value : full\n",
            FsmConfig.DEFAULT_INDEXING_METHOD);

    /* split flag is false by default*/
    addFlag("split", "sp",
            "(Optional) Explicitly specify " + "whether or not to allow split by setting this flag.");

    addOption("numReducers", "N", "(Optional) Number of reducers to be used by MG-FSM. Default value: 90 ",
            "90");

    /*------------------------------------------------------------
     * ERROR CHECKS
     *------------------------------------------------------------*/

    /* Parse the arguments received from 
     * the user during run-time.*/
    if (parseArguments(args) == null) {
        System.out.println("\n------------\n" + " E R R O R " + "\n------------\n");
        System.out.println("One of the mandatory options is NOT specified");
        System.out.println("e.g. the input option MUST be specified.");
        //Return a non-zero exit status to indicate failure
        return 1;
    }

    Parameters params = new Parameters();
    if (hasOption("tempDir")) {
        String tempDirPath = getOption("tempDir");
        params.set("tempDir", tempDirPath);
    }
    if (hasOption("input")) {
        String inputString = getOption("input");
        params.set("input", inputString);
    } else {
        params.set("input", null);
    }
    if (hasOption("support")) {
        String supportString = getOption("support");
        /* 
         * Checks & constraints on the value that can
         * be assigned to support, gamma, & lambda.
         * 
         * NOTE: refer [1]
         */
        if (Integer.parseInt(supportString) < 1) {
            System.out.println("Value of support should be greater than or equal to 1");
            //Return a non-zero exit status to indicate failure
            return (1);
        }
        params.set("support", supportString);

    }
    if (hasOption("gamma")) {
        String gammaString = getOption("gamma");

        if (Integer.parseInt(gammaString) < 0) {
            System.out.println("Value of gap should be greater than or equal to 0");
            //Return a non-zero exit status to indicate failure
            return (1);
        }
        params.set("gamma", gammaString);
    }
    if (hasOption("lambda")) {
        String lambdaString = getOption("lambda");

        if (Integer.parseInt(lambdaString) < 2) {
            System.out.println("Value of length should be greater than or equal to 2");
            //Return a non-zero exit status to indicate failure
            return (1);
        }
        params.set("lambda", lambdaString);
    }
    if (hasOption("execMode")) {
        String modeString = getOption("execMode");
        params.set("execMode", modeString);
    }
    if (hasOption("type")) {
        String modeString = getOption("type");
        params.set("type", modeString);
    }
    if (hasOption("indexing")) {
        String indexingString = getOption("indexing");
        params.set("indexing", indexingString);
    }
    if (hasOption("partitionSize")) {
        String partitionString = getOption("partitionSize");
        params.set("partitionSize", partitionString);
    }
    if (hasOption("split")) {
        params.set("split", "true");
    } else {
        params.set("split", "false");
    }
    if (hasOption("keepFiles")) {
        String keepFilesString = getOption("keepFiles");
        params.set("keepFiles", keepFilesString);
    } else {
        params.set("keepFiles", null);
    }
    if (hasOption("resume")) {
        String resumeString = getOption("resume");
        params.set("resume", resumeString);
    } else {
        params.set("resume", null);
    }

    if (hasOption("numReducers")) {
        String numReducersString = getOption("numReducers");
        params.set("numReducers", numReducersString);
    } else {
        params.set("numReducers", null);
    }

    Path inputDir = null;
    Path outputDir = getOutputPath();

    /* ---------------------------------------------------------------------
     * ERROR CHECKS ON COMBINATION OF OPTIONS SUPPLIED TO THE DRIVER
     * --------------------------------------------------------------------*/

    //Complain if the '-(t)ype' is equal to '-(m)aximal' or '-(c)losed' and 
    //the 'tempDir' is not specified
    /*if((params.get("tempDir")==null||params.get("tempDir").contentEquals("temp"))&&
       ((params.get("type").toCharArray()[0]=='m')||(params.get("type").toCharArray()[0]=='c'))){
      System.out
         .println("If -(t)ype is -(m)aximal or -(c)losed then a -tempDir path must be specified");
    }*/
    if ((params.get("resume") != null) && (params.get("keepFiles") != null)) {
        System.out.println("-(r)esume & -(k)eepFiles are mutually exclusive options");
        System.out.println("Exiting...");
        //Return a non-zero exit status to indicate failure
        return (1);
    }
    if ((params.get("input") != null) && (params.get("resume") != null)) {
        System.out.println("-(r)esume & -(i)nput are mutually exclusive options");
        System.out.println("Exiting...");
        //Return a non-zero exit status to indicate failure
        return (1);
    }
    if ((params.get("input") == null) && (params.get("resume") == null)) {
        System.out.println("At least one option from -(i)nput or -(r)esume must be specified");
        System.out.println("Exiting...");
        //Return a non-zero exit status to indicate failure
        return (1);
    } else {
        if (params.get("input") != null) {
            inputDir = new Path(params.get("input"));
        } else {
            inputDir = new Path(params.get("resume"));
        }
    }
    /* ---------------------------------------------------------------------
     * Checks to make sure the i/o paths
     * exist and are consistent.
     * --------------------------------------------------------------------
     */
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    //If the output paths exist clean them up
    if (fs.exists(outputDir)) {
        System.out.println("Deleting existing output path");
        fs.delete(outputDir, true);
    }
    //Create the necessary output paths afresh now
    fs.mkdirs(outputDir);

    //Complain if the input path doesn't exist
    if (!fs.exists(inputDir)) {
        System.out.println("\n------------\n" + " E R R O R " + "\n------------\n");

        System.out.println("Input path does not exist OR input option not specified. Exiting...");
        //Return a non-zero exit status to indicate failure
        return (1);
    }

    if (inputDir.toString().compareTo(outputDir.toString()) == 0) {
        System.out.println("\n------------\n" + " E R R O R " + "\n------------\n");

        System.out.println("The input and output path can NOT be same."
                + "\nThe output path is deleted prior to running the Hadoop jobs."
                + "\nHence, the input would be also deleted if paths are same." + "\nExiting...");
        //Return a non-zero exit status to indicate failure
        return (1);
    }

    params.set("input", inputDir.toString());
    params.set("output", outputDir.toString());

    /*---------------------------------------------------------------------
     * END OF ERROR CHECKS
     * --------------------------------------------------------------------*/

    /* Execute the FSM Job depending upon the parameters specified. */
    String executionMethod = getOption("execMode");

    //Set the resume and keepFiles flags in the commonConfig.
    //Also, set the intermediateOutput path accordingly.
    if (params.get("resume") != null)
        commonConfig.setResumeOption(true);
    else
        commonConfig.setResumeOption(false);

    if (params.get("keepFiles") != null) {
        commonConfig.setKeepFilesOption(true);
        Path intermediateDir = new Path(params.get("keepFiles"));
        if (fs.exists(intermediateDir)) {
            fs.delete(intermediateDir, true);
        }
        commonConfig.setIntermediatePath(params.get("keepFiles"));
    } else {
        File intermediateOutputPath = File.createTempFile("MG_FSM_INTRM_OP_", "");

        //Below JDK 7 we are only allowed to create temporary files.
        //Hence, turn the file into a directory in temporary folder.
        intermediateOutputPath.delete();
        intermediateOutputPath.mkdir();

        commonConfig.setIntermediatePath(intermediateOutputPath.getAbsolutePath().toString());

        System.out.println("The intermediate output will be written \n" + "to this temporary path :"
                + intermediateOutputPath);

        commonConfig.setKeepFilesOption(false);
    }

    //Set the 'tempDir' if its null
    if (params.get("tempDir") == null || params.get("tempDir").contentEquals("temp")) {

        File tempOutputPath = File.createTempFile("MG_FSM_TEMP_OP_", "");

        tempOutputPath.delete();
        //tempOutputPath.mkdir();

        commonConfig.setTmpPath(tempOutputPath.getAbsolutePath().toString());

        System.out.println("The temporary output associated with the internal map -reduce\n"
                + "jobs will be written to this temporary path :" + commonConfig.getTmpPath());
    } else {
        commonConfig.setTmpPath(params.get("tempDir"));
    }

    //Set the input and output paths of the commonConfig
    commonConfig.setInputPath(params.get("input"));
    commonConfig.setOutputPath(params.get("output"));
    commonConfig.setDictionaryPath(
            commonConfig.getIntermediatePath().concat("/" + Constants.OUTPUT_DICTIONARY_FILE_PATH));

    //Supply the rest of the algorithm specific options to commonConfig
    commonConfig.setSigma(Integer.parseInt(params.get("support")));
    commonConfig.setGamma(Integer.parseInt(params.get("gamma")));
    commonConfig.setLambda(Integer.parseInt(params.get("lambda")));

    commonConfig.setPartitionSize(Long.parseLong(params.get("partitionSize")));
    commonConfig.setAllowSplits(Boolean.parseBoolean(params.get("splits")));

    if (params.get("numReducers") != null) {
        commonConfig.setNumberOfReducers(Integer.parseInt(params.get("numReducers")));
    }

    switch (params.get("type").toCharArray()[0]) {
    case 'a': {
        commonConfig.setType(FsmConfig.Type.ALL);
        break;
    }
    case 'm': {
        commonConfig.setType(FsmConfig.Type.MAXIMAL);
        break;
    }
    case 'c': {
        commonConfig.setType(FsmConfig.Type.CLOSED);
        break;
    }
    default: {
        commonConfig.setType(FsmConfig.Type.ALL);
        break;
    }
    }

    switch (params.get("indexing").toCharArray()[0]) {
    case 'n': {
        commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.NONE);
        break;
    }
    case 'm': {
        commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.MINMAX);
        break;
    }
    case 'f': {
        commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.FULL);
        break;
    }
    default: {
        commonConfig.setIndexingMethod(FsmConfig.IndexingMethod.FULL);
        break;
    }
    }

    //SEQUENTIAL EXECUTION MODE

    if ("s".equalsIgnoreCase(executionMethod)) {
        SequentialMode mySequentialMiner;

        mySequentialMiner = new SequentialMode(commonConfig);

        // If we are dealing with a fresh set of transactions 
        // we need to do encode & then mine.

        if (!commonConfig.isResumeOption()) {
            mySequentialMiner.createDictionary(commonConfig.getInputPath());
            mySequentialMiner.createIdToItemMap();
            //If the input path is a corpus 
            //runSeqJob will recursively call encodeAndMine()
            //on all the files to bring together a encoded sequences file
            //and consequently call the sequences miner on each of these
            //encoded sequences
            mySequentialMiner.runSeqJob(new File(commonConfig.getInputPath()));
        }
        /* 
         * If the transactions are encoded from previous runs, then run
         * the following set of functions for reading the encoded transactions
         * and then directly mine them for frequent sequences.  
         */
        else {
            mySequentialMiner.setIdToItemMap(new Dictionary().readDictionary(
                    commonConfig.getInputPath().concat("/" + Constants.OUTPUT_DICTIONARY_FILE_PATH)));

            mySequentialMiner.encodeAndMine(mySequentialMiner.getCommonConfig().getInputPath());
        }
    }

    //DISTRIBUTED EXECUTION MODE
    else if ("d".equalsIgnoreCase(executionMethod)) {

        DistributedMode myDistributedMiner = new DistributedMode(commonConfig);
        /*Execute the appropriate job based on whether we need to 
         * encode the input sequences or not.
         */
        if (!commonConfig.isResumeOption())
            myDistributedMiner.runJobs();
        else
            myDistributedMiner.resumeJobs();

    }
    //END OF EXECUTING FSM JOB
    //Return a zero exit status to indicate successful completion
    return 0;
}

From source file:distributed.hadoop.HDFSUtils.java

License:Open Source License

/**
 * Create our staging directory in HDFS (if necessary)
 * //from   w  ww  .  ja  va2  s.  c om
 * @param config the HDFSConfig containing connection details
 * @throws IOException if a problem occurs
 */
protected static void createTmpDistributedCacheDirIfNecessary(HDFSConfig config) throws IOException {
    Configuration conf = new Configuration();
    config.configureForHadoop(conf, null);

    FileSystem fs = FileSystem.get(conf);

    Path p = new Path(resolvePath(WEKA_TEMP_DISTRIBUTED_CACHE_FILES, null));

    if (!fs.exists(p)) {
        fs.mkdirs(p);
    }
}

From source file:dz.lab.hdfs.CopyMove.java

public static void createDirectory(FileSystem fs) throws IOException {
    Path newDir = new Path("/tmp/playArea/newDir");
    boolean created = fs.mkdirs(newDir);
    System.out.println("Created: " + created);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Create the temporary directory that is the root of all of the task work
 * directories.//from  w  w  w  . ja  va 2 s .c  om
 *
 * @param context the job's context
 */
public void setupJob(JobContext context) throws IOException {
    if (this.outputPath != null && this.tempPath != null) {
        Path tmpDir = new Path(this.tempPath, HirodsFileOutputCommitter.TEMP_DIR_NAME);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (!fileSys.mkdirs(tmpDir)) {
            LOG.error("Mkdirs failed to create " + tmpDir.toString());
        }
    }
}