Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java

License:Apache License

public void split(Path baseInputDir, Path baseOutputDir) throws IOException {
    FileSystem fs = baseInputDir.getFileSystem(getConf());
    Path action1DirPath = new Path(baseOutputDir, options.getAction1Dir());
    Path action2DirPath = new Path(baseOutputDir, options.getAction2Dir());
    Path actionOtherDirPath = new Path(baseOutputDir, options.getActionOtherDir());
    Path action1FilePath = new Path(action1DirPath, options.getAction1File());
    Path action2FilePath = new Path(action2DirPath, options.getAction2File());
    Path actionOtherFilePath = new Path(actionOtherDirPath, options.getActionOtherFile());
    FSDataOutputStream action1File;
    FSDataOutputStream action2File;
    FSDataOutputStream actionOtherFile;/*www.j a v a2  s.  com*/

    if (!fs.exists(baseOutputDir)) {
        LOGGER.info("Preference output dir:" + baseOutputDir.toString() + " does not exist. creating it.");
        fs.mkdirs(baseOutputDir);
    }

    if (fs.exists(action1DirPath))
        fs.delete(action1DirPath, true);
    if (fs.exists(action2DirPath))
        fs.delete(action2DirPath, true);
    if (fs.exists(actionOtherDirPath))
        fs.delete(actionOtherDirPath, true);

    // cleaned out prefs if they existed, now create a place to put the new ones
    fs.mkdirs(action1DirPath);
    fs.mkdirs(action2DirPath);
    fs.mkdirs(actionOtherDirPath);
    action1File = fs.create(action1FilePath);
    action2File = fs.create(action2FilePath);
    actionOtherFile = fs.create(actionOtherFilePath);

    List<FSDataInputStream> actionFiles = getActionFiles(baseInputDir);

    Integer uniqueUserIDCounter = 0;
    Integer uniqueItemIDCounter = 0;
    for (FSDataInputStream stream : actionFiles) {
        BufferedReader bin = new BufferedReader(new InputStreamReader(stream));
        String actionLogLine;
        while ((actionLogLine = bin.readLine()) != null) {//get user to make a rec for
            String[] columns = actionLogLine.split(options.getInputDelimiter());
            if (options.getTimestampColumn() != -1) { // ignoring for now but may be useful
                String timestamp = columns[options.getTimestampColumn()].trim();
            }
            String externalUserIDString = columns[options.getUserIdColumn()].trim();
            String externalItemIDString = columns[options.getItemIdColumn()].trim();
            String actionString = columns[options.getActionColumn()].trim();

            // create a bi-directional index of external->internal ids
            String internalUserID;
            String internalItemID;
            if (this.userIndex.containsKey(externalUserIDString)) {// already in the user index
                internalUserID = this.userIndex.get(externalUserIDString);
            } else {
                internalUserID = uniqueUserIDCounter.toString();
                this.userIndex.forcePut(externalUserIDString, internalUserID);
                uniqueUserIDCounter += 1;
                if (uniqueUserIDCounter % 10000 == 0)
                    LOGGER.debug(
                            "Splitter processed: " + Integer.toString(uniqueUserIDCounter) + " unique users.");
            }
            if (this.itemIndex.containsKey(externalItemIDString)) {// already in the item index
                internalItemID = this.itemIndex.get(externalItemIDString);
            } else {
                internalItemID = uniqueItemIDCounter.toString();
                this.itemIndex.forcePut(externalItemIDString, internalItemID);
                uniqueItemIDCounter += 1;
            }
            if (actionString.equals(options.getAction1())) {
                action1File.writeBytes(internalUserID + options.getOutputDelimiter() + internalItemID
                        + options.getOutputDelimiter() + "1.0\n");
            } else if (actionString.equals(options.getAction2())) {
                action2File.writeBytes(internalUserID + options.getOutputDelimiter() + internalItemID
                        + options.getOutputDelimiter() + "1.0\n");
            } else {
                actionOtherFile.writeBytes(actionLogLine);//write what's not recognized
            }
        }
    }
    action1File.close();
    action2File.close();
    actionOtherFile.close();
    int i = 0;//breakpoint after close to inspect files
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public void mkdirs(final DataFile dir) throws IOException {

    final Path path = getPath(dir);

    if (path == null) {
        throw new NullPointerException("Path to create is null");
    }//from   w  ww  . j  a  va  2  s.  c  o  m
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to create the directory, The FileSystem is null");
    }

    if (!fs.mkdirs(path)) {
        throw new IOException("Unable to create the directory: " + dir);
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.MainHadoop.java

License:LGPL

@Override
protected Handler getLogHandler(final URI logFile) throws IOException {

    if (logFile == null) {
        throw new NullPointerException("The log file is null");
    }/*from   w  w  w .j a  v a2 s. co m*/

    final Path loggerPath = new Path(logFile);
    final FileSystem loggerFs = loggerPath.getFileSystem(this.conf);

    final Path parentPath = loggerPath.getParent();

    // Create parent directory if necessary
    if (!loggerFs.exists(loggerPath.getParent())) {
        if (!loggerFs.mkdirs(loggerPath.getParent())) {
            throw new IOException("Unable to create directory " + parentPath + " for log file:" + logFile);
        }
    }

    return new StreamHandler(loggerFs.create(loggerPath), Globals.LOG_FORMATTER);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from w  ww  .j  av a  2  s.co m*/
 */
private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args)
        throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    // set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to
                // prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        // skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        // skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            // if (LOG.isTraceEnabled()) {
                            // LOG.trace("adding file " + child.getPath());
                            // }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        getLogger().info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    getLogger().info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.upload.DataFileDistCp.java

License:LGPL

public void copy(final Map<DataFile, DataFile> entries) throws IOException {

    if (entries == null || entries.size() == 0) {
        return;//from   w w w .jav  a  2 s  . c om
    }

    final Configuration conf = this.conf;
    final Path tmpInputDir = PathUtils.createTempPath(this.jobPath, "distcp-in-", "", conf);
    final Path tmpOutputDir = PathUtils.createTempPath(this.jobPath, "distcp-out-", "", conf);

    //
    // Create entries for distcp
    //

    final FileSystem fs = tmpInputDir.getFileSystem(conf);
    fs.mkdirs(tmpInputDir);

    // Sort files by size
    final List<DataFile> inFiles = Lists.newArrayList(entries.keySet());
    sortInFilesByDescSize(inFiles);

    // Set the format for the id of the copy task
    final NumberFormat nf = NumberFormat.getInstance();
    nf.setMinimumIntegerDigits(Integer.toString(inFiles.size()).length());
    nf.setGroupingUsed(false);

    int count = 0;
    for (DataFile inFile : inFiles) {

        count++;

        final DataFile outFile = entries.get(inFile);

        final Path f = new Path(tmpInputDir, "distcp-" + nf.format(count) + ".cp");

        getLogger().info("Task copy " + inFile + " in " + f.toString());

        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(f), CHARSET));

        bw.write(inFile.getSource() + "\t" + outFile.getSource() + "\n");
        bw.close();
    }

    final Job job = createJobConf(conf, tmpInputDir, tmpOutputDir);

    try {
        job.waitForCompletion(false);
    } catch (InterruptedException | ClassNotFoundException e) {
        throw new EoulsanRuntimeException("Error while distcp: " + e.getMessage(), e);
    }

    // Remove tmp directory
    PathUtils.fullyDelete(tmpInputDir, conf);
    PathUtils.fullyDelete(tmpOutputDir, conf);

    if (!job.isSuccessful()) {
        throw new IOException("Unable to copy files using DataFileDistCp.");
    }

}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Create a directory. If parent directories don't exists create it.
 * @param path Path of the directory to create
 * @param conf Configuration// w w w. j a  va  2 s .  co m
 * @return true if the directory is successfully created
 * @throws IOException if an error occurs while creating the directory
 */
public static final boolean mkdirs(final Path path, final Configuration conf) throws IOException {

    if (path == null) {
        throw new NullPointerException("The path of the directory to create is null.");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null");
    }

    final FileSystem fs = path.getFileSystem(conf);
    return fs.mkdirs(path);
}

From source file:fr.jetoile.hadoopunit.component.OozieBootstrapTest.java

License:Apache License

@Test
public void oozieShouldStart() throws Exception {

    LOGGER.info("OOZIE: Test Submit Workflow Start");

    FileSystem hdfsFs = ((HdfsBootstrap) HadoopBootstrap.INSTANCE.getService(Component.HDFS))
            .getHdfsFileSystemHandle();/*  ww w. jav a  2 s .c om*/
    OozieClient oozieClient = ((OozieBootstrap) HadoopBootstrap.INSTANCE.getService(Component.OOZIE))
            .getOozieClient();

    Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp");
    hdfsFs.mkdirs(new Path(appPath, "lib"));
    Path workflow = new Path(appPath, "workflow.xml");

    //write workflow.xml
    String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + "    <start to='end'/>"
            + "    <end name='end'/>" + "</workflow-app>";

    Writer writer = new OutputStreamWriter(hdfsFs.create(workflow));
    writer.write(wfApp);
    writer.close();

    //write job.properties
    Properties conf = oozieClient.createConfiguration();
    conf.setProperty(OozieClient.APP_PATH, workflow.toString());
    conf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    //submit and check
    final String jobId = oozieClient.submit(conf);
    WorkflowJob wf = oozieClient.getJobInfo(jobId);
    assertNotNull(wf);
    assertEquals(WorkflowJob.Status.PREP, wf.getStatus());

    LOGGER.info("OOZIE: Workflow: {}", wf.toString());
    hdfsFs.close();
    assertThat("true").isEqualTo("true");

}

From source file:fr.jetoile.hadoopunit.integrationtest.IntegrationBootstrapTest.java

License:Apache License

@Test
public void oozieShouldStart() throws Exception {
    LOGGER.info("OOZIE: Test Submit Workflow Start");

    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.set("fs.default.name", "hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY));

    URI uri = URI.create("hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY));

    FileSystem hdfsFs = FileSystem.get(uri, conf);

    OozieClient oozieClient = new OozieClient("http://" + configuration.getString(OozieBootstrap.OOZIE_HOST)
            + ":" + configuration.getInt(OozieBootstrap.OOZIE_PORT) + "/oozie");

    Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp");
    hdfsFs.mkdirs(new Path(appPath, "lib"));
    Path workflow = new Path(appPath, "workflow.xml");

    //write workflow.xml
    String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + "    <start to='end'/>"
            + "    <end name='end'/>" + "</workflow-app>";

    Writer writer = new OutputStreamWriter(hdfsFs.create(workflow));
    writer.write(wfApp);//from   ww  w .j  a  v  a  2 s .  c  om
    writer.close();

    //write job.properties
    Properties oozieConf = oozieClient.createConfiguration();
    oozieConf.setProperty(OozieClient.APP_PATH, workflow.toString());
    oozieConf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    //submit and check
    final String jobId = oozieClient.submit(oozieConf);
    WorkflowJob wf = oozieClient.getJobInfo(jobId);
    Assert.assertNotNull(wf);
    assertEquals(WorkflowJob.Status.PREP, wf.getStatus());

    LOGGER.info("OOZIE: Workflow: {}", wf.toString());
    hdfsFs.close();

}

From source file:fr.jetoile.hadoopunit.integrationtest.ManualIntegrationBootstrapTest.java

License:Apache License

@Test
public void oozieShouldStart() throws Exception {

    LOGGER.info("OOZIE: Test Submit Workflow Start");

    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.set("fs.default.name", "hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY));

    URI uri = URI.create("hdfs://127.0.0.1:" + configuration.getInt(Config.HDFS_NAMENODE_PORT_KEY));

    FileSystem hdfsFs = FileSystem.get(uri, conf);

    OozieClient oozieClient = new OozieClient("http://" + configuration.getString(OozieBootstrap.OOZIE_HOST)
            + ":" + configuration.getInt(OozieBootstrap.OOZIE_PORT) + "/oozie");

    Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp");
    hdfsFs.mkdirs(new Path(appPath, "lib"));
    Path workflow = new Path(appPath, "workflow.xml");

    //write workflow.xml
    String wfApp = "<workflow-app xmlns='uri:oozie:workflow:0.1' name='test-wf'>" + "    <start to='end'/>"
            + "    <end name='end'/>" + "</workflow-app>";

    Writer writer = new OutputStreamWriter(hdfsFs.create(workflow));
    writer.write(wfApp);/*ww  w .  j av  a  2  s.co  m*/
    writer.close();

    //write job.properties
    Properties oozieConf = oozieClient.createConfiguration();
    oozieConf.setProperty(OozieClient.APP_PATH, workflow.toString());
    oozieConf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    //submit and check
    final String jobId = oozieClient.submit(oozieConf);
    WorkflowJob wf = oozieClient.getJobInfo(jobId);
    Assert.assertNotNull(wf);
    assertEquals(WorkflowJob.Status.PREP, wf.getStatus());

    LOGGER.info("OOZIE: Workflow: {}", wf.toString());
    hdfsFs.close();

}

From source file:fr.jetoile.hadoopunit.integrationtest.SparkIntegrationTest.java

License:Apache License

@Before
public void before() throws IOException, URISyntaxException {
    FileSystem fileSystem = HdfsUtils.INSTANCE.getFileSystem();

    fileSystem.mkdirs(new Path("/khanh/test"));
    fileSystem.mkdirs(new Path("/khanh/test_parquet"));
    fileSystem.copyFromLocalFile(/*from  www  . ja  v  a  2s.  c  om*/
            new Path(SparkIntegrationTest.class.getClassLoader().getResource("test.csv").toURI()),
            new Path("/khanh/test/test.csv"));

    new HiveSetup(HiveConnectionUtils.INSTANCE.getDestination(), Operations.sequenceOf(CREATE_TABLES)).launch();
}