Example usage for org.apache.hadoop.mapred JobClient getFs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobClient getFs.

Prototype

public synchronized FileSystem getFs() throws IOException

Source Link

Document

Get a filesystem handle.

Usage

From source file:org.apache.whirr.examples.HadoopClusterExample.java

License:Apache License

private void waitToExitSafeMode(JobClient client) throws IOException {
    LOG.info("Waiting to exit safe mode...");
    FileSystem fs = client.getFs();
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    boolean inSafeMode = true;
    while (inSafeMode) {
        inSafeMode = dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET);
        try {/*from w  w  w.  j  av  a2  s  .c  om*/
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            break;
        }
    }
    LOG.info("Exited safe mode");
}

From source file:org.apache.whirr.service.hadoop.integration.HadoopServiceController.java

License:Apache License

private static void waitToExitSafeMode(JobClient client) throws IOException {
    LOG.info("Waiting to exit safe mode...");
    FileSystem fs = client.getFs();
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    boolean inSafeMode = true;
    while (inSafeMode) {
        inSafeMode = dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET);
        try {/*from  ww  w  .j  av  a 2 s .c  om*/
            System.out.print(".");
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            break;
        }
    }
    LOG.info("Exited safe mode");
}

From source file:org.jd.copier.mapred.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments//from  w w  w. j av a2  s .co m
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update
                                && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck);
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToCopyCount=" + fileCount);
    LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
    return fileCount > 0;
}

From source file:org.smartfrog.services.hadoop.components.cluster.ClusterStatusCheckerImpl.java

License:Open Source License

/**
 * Check the cluster status//from ww  w .  j  av  a  2  s.com
 *
 * @throws SFHadoopException on any problem with the checks
 * @return a cluster status string
 */
private String checkClusterStatus() throws SmartFrogException {

    try {
        JobClient cluster = createClientOnDemand();
        ClusterStatus status = cluster.getClusterStatus();
        StringBuilder result = new StringBuilder();

        if (supportedFileSystem) {
            Path sysDir = cluster.getSystemDir();
            URI uri = sysDir.toUri();
            sfLog().info("Checking filesystem " + uri);
            ManagedConfiguration conf = (ManagedConfiguration) cluster.getConf();
            String impl = "fs." + uri.getScheme() + ".impl";
            String classname = conf.get(impl);
            if (classname == null) {
                maybeDumpConfiguration(conf);
                throw new SFHadoopException("File system " + uri + " will not load "
                        + " - no configuration mapping for " + impl + " in " + conf.dump(), this, conf);
            }
            try {
                conf.getClassByName(classname);
            } catch (ClassNotFoundException e) {
                throw new SFHadoopException("File system " + uri + " will not load "
                        + " - unable to locate class " + impl + " : " + e, e, this, conf);
            }
            try {
                result.append("Filesystem: ").append(uri).append(" ; ");
                FileSystem fs = cluster.getFs();
            } catch (IOException e) {
                throw new SFHadoopException("File system " + uri + " will not load " + e, e, this, conf);
            } catch (IllegalArgumentException e) {
                throw new SFHadoopException("Bad File system URI" + e, e, this, conf);
            }
        }
        if (jobtrackerLive) {
            sfLog().info("Checking jobTracker ");
            JobTracker.State state = status.getJobTrackerState();
            if (!state.equals(JobTracker.State.RUNNING)) {
                throw new SFHadoopException(
                        "Job Tracker at " + jobtracker + " is not running. It is in the state " + state, this);
            }
            result.append("Job tracker is in state ").append(status);
        }
        checkRange(minActiveMapTasks, maxActiveMapTasks, status.getMapTasks(), "map task");
        checkRange(minActiveReduceTasks, maxActiveReduceTasks, status.getReduceTasks(), "reduce task");
        checkMax(maxSupportedMapTasks, status.getMaxMapTasks(), "supported max map task");
        checkMax(maxSupportedReduceTasks, status.getMaxReduceTasks(), "supported max reduce task");
        result.append(" Map Tasks = ").append(status.getMapTasks());
        result.append(" Reduce Tasks = ").append(status.getReduceTasks());
        return result.toString();
    } catch (IOException e) {
        throw new SFHadoopException("Cannot connect to" + jobtracker, e, this);
    }
}