Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.phantom.hadoop.examples.pi.Util.java

License:Apache License

/** Create a directory. */
static boolean createNonexistingDirectory(FileSystem fs, Path dir) throws IOException {
    if (fs.exists(dir)) {
        Util.err.println("dir (= " + dir + ") already exists.");
        return false;
    } else if (!fs.mkdirs(dir)) {
        throw new IOException("Cannot create working directory " + dir);
    }//from  w  w  w. j a v a  2s. c  o m
    fs.setPermission(dir, new FsPermission((short) 0777));
    return true;
}

From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*from  w ww  . j  a v a2  s . c o  m*/
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from  w w w  .  ja  v  a2s  . c  o  m*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create the temporary directory that is the root of all of the task 
 * work directories.//w ww  .j a va  2  s .c o  m
 * @param context the job's context
 */
public void setupJob(JobContext context) throws IOException {
    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (!fileSys.mkdirs(tmpDir)) {
            LOG.error("Mkdirs failed to create " + tmpDir.toString());
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context//from  w w w  .ja v a 2s . c om
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.rapleaf.hank.hadoop.DomainBuilderOutputCommitter.java

License:Apache License

public static void commitJob(String domainName, JobConf conf) throws IOException {
    Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf));
    Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf));
    FileSystem fs = outputPath.getFileSystem(conf);

    // Create outputPath
    fs.mkdirs(outputPath);

    // Move temporary output to final output
    LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath);
    FileStatus[] partitions = fs.listStatus(tmpOutputPath);
    for (FileStatus partition : partitions) {
        if (partition.isDir()) {
            FileStatus[] partitionFiles = fs.listStatus(partition.getPath());
            for (FileStatus partitionFile : partitionFiles) {
                Path sourcePath = partitionFile.getPath();
                Path targetPath = new Path(new Path(outputPath, partition.getPath().getName()),
                        partitionFile.getPath().getName());
                LOG.info("Moving: " + sourcePath + " to: " + targetPath);
                if (!fs.mkdirs(targetPath.getParent())) {
                    throw new IOException("Failed at creating directory " + targetPath.getParent());
                }/* w w  w .  jav  a  2 s  .  c  om*/
                if (!fs.rename(sourcePath, targetPath)) {
                    throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
                }
            }
        }
    }

    // Finally, cleanup
    cleanupJob(domainName, conf);
}

From source file:com.redsqirl.workflow.server.connect.hcat.HCatalogType.java

License:Open Source License

private void createDir(String path) {
    if (pathCreated.add(path)) {
        try {//w  w  w  .  j  a v a  2  s .  c  om
            FileSystem fs = NameNodeVar.getFS();
            Path p = new Path(path);
            if (!fs.exists(p)) {
                fs.mkdirs(p);
            }
        } catch (Exception e) {
            pathCreated.remove(path);
        }
    }

}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Create a path on HDFS with properties
 * /*from   w w w  . j  av  a  2  s. c om*/
 * @param path
 * @param properties
 * @throws RemoteException
 */
@Override
public String create(String path, Map<String, String> properties) throws RemoteException {
    String error = null;
    HdfsFileChecker fCh = new HdfsFileChecker(path);
    if (fCh.isInitialized() && !fCh.exists()) {
        if (properties.get(key_type) == null || properties.get(key_type).equalsIgnoreCase("directory")
                || properties.get(key_type).equalsIgnoreCase("file")) {
            try {
                FileSystem fs = NameNodeVar.getFS();
                boolean ok;
                if (properties.get(key_type) == null
                        || properties.get(key_type).equalsIgnoreCase("directory")) {
                    ok = fs.mkdirs(new Path(path));
                } else {
                    ok = fs.createNewFile(new Path(path));
                }
                // fs.close();
                if (ok) {
                    changeProperties(path, properties);
                } else {
                    error = LanguageManagerWF.getText("HdfsInterface.createdirfail", new Object[] { path });
                }
            } catch (IOException e) {
                error = LanguageManagerWF.getText("HdfsInterface.cannotcreate", new Object[] { path });
                logger.error(error);
                logger.error(e.getMessage());
            }
        } else {
            error = LanguageManagerWF.getText("HdfsInterface.typenotexists",
                    new Object[] { properties.get(key_type) });
        }
    } else {
        error = LanguageManagerWF.getText("HdfsInterface.pathexists", new Object[] { path });
    }
    // fCh.close();
    if (error != null) {
        logger.debug(error);
    }
    return error;
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

private String copyInHDFS(Channel channel, String rfile, String lfile, SSHDataStore remoteServer)
        throws Exception {

    String error = null;//from   w  w w. jav  a  2  s .c o  m
    FileSystem fs = NameNodeVar.getFS();

    Map<String, String> p = remoteServer.getProperties(rfile);
    if (p.get(SSHInterface.key_type).equals("file")) {

        String nameRdm = RandomString.getRandomName(20);
        String tmpFileStr = System.getProperty("java.io.tmpdir") + "/" + nameRdm;

        if (channel.isClosed()) {
            channel.connect();
        }
        logger.info("Copy " + rfile + " to " + tmpFileStr);
        ((ChannelSftp) channel).get(rfile, tmpFileStr);
        logger.info("Copy local " + tmpFileStr + " to HDFS " + lfile);
        fs.copyFromLocalFile(new Path(tmpFileStr), new Path(lfile));
        new File(tmpFileStr).delete();

    } else {

        if (!fs.exists(new Path(lfile))) {
            if (!fs.mkdirs(new Path(lfile))) {
                // create the directory
                error = lfile + ": Cannot create such directory";
            }
        } else if (!fs.isDirectory(new Path(lfile))) {
            //already exists as a file
            error = lfile + ": Not a directory";
        }

        if (error == null) {
            logger.info("Create the directory " + lfile);

            Map<String, Map<String, String>> files = remoteServer.getChildrenProperties(rfile);
            logger.debug(files);

            for (String path : files.keySet()) {
                Map<String, String> props = files.get(path);

                logger.debug(props.get("type") + " " + path);

                String fileName = path.replaceFirst(rfile, "");
                //String fileName = path.substring(path.lastIndexOf("/"));
                logger.debug("fileName " + fileName);

                error = copyInHDFS(channel, rfile + fileName, lfile + fileName, remoteServer);
                if (error != null) {
                    break;
                }
            }
        }

    }

    return error;
}

From source file:com.redsqirl.workflow.server.connect.jdbc.JdbcStore.java

License:Open Source License

public static String writePassword(String connectionName, JdbcDetails details) {
    String passwordPathStr = "/user/" + System.getProperty("user.name") + "/.redsqirl/jdbc_password/password_"
            + connectionName;//ww w  .  ja v a 2  s.  c om
    Path passwordPath = new Path(passwordPathStr);

    try {
        FileSystem fileSystem = NameNodeVar.getFS();
        if (fileSystem.exists(passwordPath)) {
            BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(passwordPath)));
            String line = br.readLine();
            if (line == null || !line.equals(details.getPassword())) {
                fileSystem.delete(passwordPath, false);
            }
            br.close();
        }
        if (!fileSystem.exists(passwordPath) && details.getPassword() != null) {
            if (!fileSystem.exists(passwordPath.getParent())) {
                fileSystem.mkdirs(passwordPath.getParent());
                fileSystem.setPermission(passwordPath.getParent(), new FsPermission("700"));
            }
            FSDataOutputStream out = fileSystem.create(passwordPath);
            out.write(details.getPassword().getBytes());
            out.close();
            fileSystem.setPermission(passwordPath, new FsPermission("400"));
        }
    } catch (Exception e) {
        logger.error(e, e);
    }
    return passwordPathStr;
}