Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:etl.cmd.test.MiniOozieTestCase.java

License:Apache License

@Override
protected void setUp() throws Exception {
    System.setProperty("hadoop20", "true");
    super.setUp();

    FileSystem fs = getFileSystem();
    String remoteShareLibFolder = "/user/" + getOozieUser() + "/share/lib";
    fs.delete(new Path(remoteShareLibFolder), true);
    fs.mkdirs(new Path(remoteShareLibFolder));

    LocalOozie.start();/*from   w w  w  .  j a  va  2 s.com*/
}

From source file:etl.cmd.test.XFsTestCase.java

License:Apache License

private Path initFileSystem(FileSystem fs) throws Exception {
    Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString());
    Path testDirInFs = fs.makeQualified(path);
    System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs));
    if (fs.exists(testDirInFs)) {
        setAllPermissions(fs, testDirInFs);
    }/*from  www.jav a 2s.c  o  m*/
    fs.delete(testDirInFs, true);
    if (!fs.mkdirs(path)) {
        throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs));
    }
    fs.setOwner(testDirInFs, getTestUser(), getTestGroup());
    fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x"));
    return testDirInFs;
}

From source file:etl.cmd.test.XTestCase.java

License:Apache License

private void setUpEmbeddedHadoop(String testCaseDir) throws Exception {
    if (dfsCluster == null && mrCluster == null) {
        if (System.getProperty("hadoop.log.dir") == null) {
            System.setProperty("hadoop.log.dir", testCaseDir);
        }/*from   ww w  . ja  v a  2 s .  co  m*/
        String oozieUser = getOozieUser();
        JobConf conf = createDFSConfig();
        String[] userGroups = new String[] { getTestGroup(), getTestGroup2() };
        UserGroupInformation.createUserForTesting(oozieUser, userGroups);
        UserGroupInformation.createUserForTesting(getTestUser(), userGroups);
        UserGroupInformation.createUserForTesting(getTestUser2(), userGroups);
        UserGroupInformation.createUserForTesting(getTestUser3(), new String[] { "users" });

        try {
            MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
            dfsCluster = builder.build();
            FileSystem fileSystem = dfsCluster.getFileSystem();
            fileSystem.mkdirs(new Path("target/test-data"));
            fileSystem.mkdirs(new Path("target/test-data" + "/minicluster/mapred"));
            fileSystem.mkdirs(new Path("/user"));
            fileSystem.mkdirs(new Path("/tmp"));
            fileSystem.mkdirs(new Path("/hadoop/mapred/system"));
            fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("target/test-data" + "/minicluster"),
                    FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("target/test-data" + "/minicluster/mapred"),
                    FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------"));

            mrCluster = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
            Configuration jobConf = mrCluster.getConfig();
            System.setProperty(OOZIE_TEST_JOB_TRACKER, jobConf.get("mapreduce.jobtracker.address"));
            String rmAddress = jobConf.get("yarn.resourcemanager.address");
            log.info("Job tracker: " + rmAddress);
            if (rmAddress != null) {
                System.setProperty(OOZIE_TEST_JOB_TRACKER, rmAddress);
            }
            System.setProperty(OOZIE_TEST_NAME_NODE, jobConf.get("fs.defaultFS"));
            ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
        } catch (Exception ex) {
            shutdownMiniCluster();
            throw ex;
        }
        new MiniClusterShutdownMonitor().start();
    }
}

From source file:etl.cmd.test.XTestCase.java

License:Apache License

private void setUpEmbeddedHadoop2() throws Exception {
    if (dfsCluster != null && dfsCluster2 == null) {
        // Trick dfs location for MiniDFSCluster since it doesn't accept location as input)
        String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data");
        try {//from ww  w.  j a  v  a2  s.c  om
            System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2"));
            // Only DFS cluster is created based upon current need
            MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(createDFSConfig());
            dfsCluster2 = builder.build();
            FileSystem fileSystem = dfsCluster2.getFileSystem();
            fileSystem.mkdirs(new Path("target/test-data"));
            fileSystem.mkdirs(new Path("/user"));
            fileSystem.mkdirs(new Path("/tmp"));
            fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
            System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.defaultFS"));
        } catch (Exception ex) {
            shutdownMiniCluster2();
            throw ex;
        } finally {
            // Restore previus value
            System.setProperty("test.build.data", testBuildDataSaved);
        }
    }
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java

License:Apache License

@Override
public int run(String[] args) {
    try {/*ww w.j  av a  2 s  .c o m*/
        Configuration conf = HBaseConfiguration.create();
        //additional output using TextOutputFormat.
        conf.set("file.names", args[3]);

        Job job = Job.getInstance(conf);
        //TableMapReduceUtil.addDependencyJars(job); 
        job.setJarByClass(CompetencesDistanceDriver.class);
        //This row must be changed
        job.setJobName("Words Group By Title Driver");

        Path inPath = new Path(args[0]);
        Path outPath = new Path(args[1]);

        Path competencesPath = new Path(args[2]);
        Path competencesPathHDFS = competencesPath;
        FileSystem fs = FileSystem.get(conf);

        if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
            competencesPathHDFS = new Path(competencesPath.getName());
            if (!fs.exists(competencesPathHDFS)) {
                fs.mkdirs(competencesPathHDFS);
                File[] stats = new File(competencesPath.toString()).listFiles();
                for (File stat : stats) {
                    Path filePath = new Path(stat.getAbsolutePath());
                    if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) {
                        Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName());
                        fs.copyFromLocalFile(filePath, dest);
                    }
                }
            }
        }
        job.addCacheFile(competencesPathHDFS.toUri());

        FileInputFormat.setInputPaths(job, inPath);

        FileOutputFormat.setOutputPath(job, outPath);
        fs.delete(outPath, true);

        job.setMapperClass(CompetencesDistanceMapper.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(CompetencesDistanceReducer.class);
        //            job.setOutputFormatClass(TableOutputFormat.class);
        //            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence");
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        String[] fileNames = args[3].split(",");
        for (String n : fileNames) {
            MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class);
        }

        return (job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException
            | ClassNotFoundException ex) {
        Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex);
    }
    return 0;
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();
    Job job = Job.getInstance(jobconf);//from   w  ww . j a  va2  s. c o  m
    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path dictionary = new Path(args[0]);
    Path dictionaryHdfs = dictionary;

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = localDocs;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        dictionaryHdfs = new Path(dictionary.getName());
        if (!fs.exists(dictionaryHdfs)) {
            fs.copyFromLocalFile(dictionary, dictionaryHdfs);
        }
        hdfsDocs = new Path(localDocs.getName());
        fs.mkdirs(hdfsDocs);
        fs.deleteOnExit(hdfsDocs);

        File[] stats = new File(localDocs.toString()).listFiles();

        for (File stat : stats) {
            Path filePath = new Path(stat.getAbsolutePath());
            if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
                Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
                fs.copyFromLocalFile(filePath, dest);
            }
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, dictionaryHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //        job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dictionaryHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();

    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;/*from   ww  w.  j a va  2 s  .c  o m*/
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);
        FileStatus inHdfsStatus = fs.getFileStatus(inHdfs);
        //            Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()});
    }

    Job job = Job.getInstance(jobconf);
    Path stopwordsLocal = new Path(args[3]);
    stopwords = new Path(stopwordsLocal.getName());
    fs.delete(stopwords, true);
    fs.copyFromLocalFile(stopwordsLocal, stopwords);
    fs.deleteOnExit(stopwords);

    FileStatus stopwordsStatus = fs.getFileStatus(stopwords);
    stopwords = stopwordsStatus.getPath();
    job.addCacheFile(stopwords.toUri());

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = new Path(localDocs.getName());
    fs.mkdirs(hdfsDocs);
    hdfsDocs = fs.getFileStatus(hdfsDocs).getPath();
    fs.delete(hdfsDocs, true);
    //        FileStatus[] stats = fs.listStatus(localDocs);
    File[] stats = new File(localDocs.toString()).listFiles();

    for (File stat : stats) {
        //        for (FileStatus stat : stats) {
        Path filePath = new Path(stat.getAbsolutePath());
        if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
            Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
            fs.copyFromLocalFile(filePath, dest);
        }
    }

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //         job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, inHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);
    Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}",
            NLineInputFormat.getNumLinesPerSplit(job));

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.scape_project.tb.lsdr.seqfileutility.hadoop.HadoopJob.java

License:Apache License

/**
 * Run hadoop job//from  w w  w .  j ava  2s . c om
 *
 * @param strings Command line arguments
 * @return Success indicator
 * @throws Exception
 */
@Override
public int run(String[] strings) throws Exception {
    try {
        String hdfsInputDir = null;
        FileSystem hdfs = FileSystem.get(conf);

        // hdfs input path is given as command parameter
        if (pc.getHdfsInputPath() != null) {
            hdfsInputDir = pc.getHdfsInputPath();
            // hdfs input file is created
        } else {
            hdfsInputDir = "input/" + System.currentTimeMillis() + "sfu/";

            String[] extensions = null;
            if (pc.getExtStr() != null) {
                StringTokenizer st = new StringTokenizer(pc.getExtStr(), ",");
                extensions = new String[st.countTokens()];
                int i = 0;
                while (st.hasMoreTokens()) {
                    extensions[i] = st.nextToken();
                    i++;
                }
            }

            hdfs.mkdirs(new Path(hdfsInputDir));

            String hdfsIinputPath = hdfsInputDir + "inputpaths.txt";
            Path path = new Path(hdfsIinputPath);

            FSDataOutputStream outputStream = hdfs.create(path);

            List<String> dirs = StringUtils.getStringListFromString(pc.getDirsStr(), ",");
            for (String dir : dirs) {
                File directory = new File(dir);
                if (directory.isDirectory()) {
                    // Alternatively, the java traverse method can be used
                    // for creating the file paths:
                    //traverse(directory, outputStream);
                    writeFilePaths(directory, outputStream);
                } else {
                    logger.warn("Parameter \"" + dir + "\" is not a directory " + "(skipped)");
                }
            }
            outputStream.close();
            if (hdfs.exists(path)) {
                logger.info(
                        "Input paths created in \"" + hdfs.getHomeDirectory() + "/" + path.toString() + "\"");
            } else {
                logger.error("Input paths have not been created in hdfs.");
                return 1;
            }
        }
        String hadoopJobName = "Hadoop_sequence_file_creation";
        if (pc.getHadoopJobName() != null && !pc.getHadoopJobName().equals(""))
            hadoopJobName = pc.getHadoopJobName();
        Job job = new Job(conf, hadoopJobName);

        job.setJarByClass(SequenceFileUtility.class);
        job.setMapperClass(SmallFilesSequenceFileMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(BytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        TextInputFormat.addInputPath(job, new Path(hdfsInputDir));

        // todo: support absolute paths
        String hdfsOutputDir = pc.getOutputDirectory() != null ? pc.getOutputDirectory()
                : "output/" + System.currentTimeMillis() + "sfu/";

        SequenceFileOutputFormat.setOutputPath(job, new Path(hdfsOutputDir));
        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.get(pc.getCompressionType()));

        int success = job.waitForCompletion(true) ? 0 : 1;
        boolean seqFileExists = hdfs.exists(new Path(hdfsOutputDir + "part-r-00000"));
        if (success == 0 && seqFileExists) {
            logger.info("Sequence file created: \""
                    //+ hdfs.getHomeDirectory() + "/"
                    + new Path(hdfsOutputDir).toString() + "/part-r-00000" + "\"");
            pc.setOutputDirectory(hdfsOutputDir);
            return 0;
        } else {
            logger.error("Sequence file not created in hdfs");
            return 1;
        }
    } catch (Exception e) {
        logger.error("IOException occurred", e);
    } finally {
    }
    return 0;
}

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

public void setupJob(JobConf conf) throws IOException {
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
        FileSystem fileSys = tmpDir.getFileSystem(conf);
        if (!fileSys.mkdirs(tmpDir)) {
            LOG.error("Mkdirs failed to create " + tmpDir.toString());
        }/* w  ww. j av a  2  s.  c  o  m*/
    }
}

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

private void moveTaskOutputs(JobConf conf, TaskAttemptID taskAttemptID, FileSystem fs, Path jobOutputDir,
        Path taskOutput) throws IOException {
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput,
                getTempTaskOutputPath(conf, taskAttemptID));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskAttemptID);
            }/*w ww .  j a  v  a  2s  .  co m*/
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskAttemptID);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput,
                getTempTaskOutputPath(conf, taskAttemptID));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, taskAttemptID, fs, jobOutputDir, path.getPath());
            }
        }
    }
}