Example usage for org.apache.hadoop.mapred JobConf getLocalDirs

List of usage examples for org.apache.hadoop.mapred JobConf getLocalDirs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getLocalDirs.

Prototype

public String[] getLocalDirs() throws IOException 

Source Link

Usage

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from  ww w.  j a  v a  2  s .  c o m

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

@SuppressWarnings("unchecked")
STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException {
    // find the owner of the process
    // get the desired principal to load
    String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE);
    UserGroupInformation.setConfiguration(conf);
    if (keytabFilename != null) {
        String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name"));
        UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename);
        mrOwner = UserGroupInformation.getLoginUser();
    } else {/*  ww w. j a  va 2  s . c  om*/
        mrOwner = UserGroupInformation.getCurrentUser();
    }

    supergroup = conf.get(MR_SUPERGROUP, "supergroup");
    LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as "
            + supergroup);

    long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY,
            MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
    long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY,
            MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT);
    long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
            MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
    secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval,
            DELEGATION_TOKEN_GC_INTERVAL);
    secretManager.startThreads();

    //
    // Grab some static constants
    //

    NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND);
    if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) {
        NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND;
    }

    HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR);
    if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) {
        HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR;
    }

    // whether to dump or not every heartbeat message even when DEBUG is enabled
    dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false);

    // This is a directory of temporary submission files. We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);

    // Set ports, start RPC servers, setup security policy etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();

    int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10);
    this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(),
            addr.getPort(), handlerCount, false, conf, secretManager);
    if (LOG.isDebugEnabled()) {
        Properties p = System.getProperties();
        for (Iterator it = p.keySet().iterator(); it.hasNext();) {
            String key = (String) it.next();
            String val = p.getProperty(key);
            LOG.debug("Property '" + key + "' is " + val);
        }
    }

    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine)));
    String infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.startTime = System.currentTimeMillis();
    infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("job.tracker", this);
    infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class);
    infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class);
    infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class);
    infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class);
    infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class);
    infoServer.start();

    this.trackerIdentifier = jobtrackerIndentifier;

    // The rpc/web-server ports can be ephemeral ports...
    // ... ensure we have the correct info
    this.port = interTrackerServer.getListenerAddress().getPort();
    this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port));
    LOG.info("JobTracker up at: " + this.port);
    this.infoPort = this.infoServer.getPort();
    this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort);
    LOG.info("JobTracker webserver: " + this.infoServer.getPort());
    this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus",
            infoBindAddress, this.infoPort);
    LOG.info("JobTracker completion URI: " + defaultNotificationUrl);
    //        this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort);
    this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress,
            this.infoPort);
    LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl);
    this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort);
    LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl);
    this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort);

    while (!Thread.currentThread().isInterrupted()) {
        try {
            // if we haven't contacted the namenode go ahead and do it
            if (fs == null) {
                fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws IOException {
                        return FileSystem.get(conf);
                    }
                });
            }

            // clean up the system dir, which will only work if hdfs is out
            // of safe mode
            if (systemDir == null) {
                systemDir = new Path(getSystemDir());
            }
            try {
                FileStatus systemDirStatus = fs.getFileStatus(systemDir);
                if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) {
                    throw new AccessControlException(
                            "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName());
                }
                if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) {
                    LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to "
                            + SYSTEM_DIR_PERMISSION);
                    fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION));
                } else {
                    break;
                }
            } catch (FileNotFoundException fnf) {
            } // ignore
        } catch (AccessControlException ace) {
            LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") because of permissions.");
            LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") and then start the JobTracker.");
            LOG.warn("Bailing out ... ");
            throw ace;
        } catch (IOException ie) {
            LOG.info("problem cleaning system directory: " + systemDir, ie);
        }
        Thread.sleep(FS_ACCESS_RETRY_PERIOD);
    }

    if (Thread.currentThread().isInterrupted()) {
        throw new InterruptedException();
    }

    // initialize cluster variable
    cluster = new Cluster(this.conf);

    // now create a job client proxy
    jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID,
            JobTracker.getAddress(conf), mrOwner, this.conf,
            NetUtils.getSocketFactory(conf, ClientProtocol.class));

    new SpeculativeScheduler().start();

    // initialize task event fetcher
    new TaskCompletionEventFetcher().start();

    // Same with 'localDir' except it's always on the local disk.
    asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs());
    asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR);

    // keep at least one asynchronous worker per CPU core
    int numProcs = Runtime.getRuntime().availableProcessors();
    LOG.info("# of available processors = " + numProcs);
    int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2);
    asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy());

    speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false);
}