Example usage for org.apache.hadoop.mapred TaskLog getTaskLogLength

List of usage examples for org.apache.hadoop.mapred TaskLog getTaskLogLength

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskLog getTaskLogLength.

Prototype

public static long getTaskLogLength(JobConf conf) 

Source Link

Document

Get the desired maximum length of task's logs.

Usage

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration/*  w ww .  jav a  2s .  c o  m*/
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:org.apache.avro.mapred.tether.TetheredProcess.java

License:Apache License

private Process startSubprocess(JobConf job) throws IOException, InterruptedException {
    // get the executable command
    List<String> command = new ArrayList<String>();

    String executable = "";
    if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) {
        //we want to use the cached executable
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
        if (localFiles == null) { // until MAPREDUCE-476
            URI[] files = DistributedCache.getCacheFiles(job);
            localFiles = new Path[] { new Path(files[0].toString()) };
        }//w  ww  .j  a v a 2  s .  co  m
        executable = localFiles[0].toString();
        FileUtil.chmod(executable.toString(), "a+x");
    } else {
        executable = job.get(TetherJob.TETHER_EXEC);
    }

    command.add(executable);

    // Add the executable arguments. We assume the arguments are separated by
    // spaces so we split the argument string based on spaces and add each
    // token to command We need to do it this way because
    // TaskLog.captureOutAndError will put quote marks around each argument so
    // if we pass a single string containing all arguments we get quoted
    // incorrectly
    String args = job.get(TetherJob.TETHER_EXEC_ARGS);
    String[] aparams = args.split("\n");
    for (int i = 0; i < aparams.length; i++) {
        aparams[i] = aparams[i].trim();
        if (aparams[i].length() > 0) {
            command.add(aparams[i]);
        }
    }

    if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null)
        System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR"));

    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(job);
    command = TaskLog.captureOutAndError(command, stdout, stderr, logLength);
    stdout.getParentFile().mkdirs();
    stderr.getParentFile().mkdirs();

    // add output server's port to env
    Map<String, String> env = new HashMap<String, String>();
    env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort()));

    // start child process
    ProcessBuilder builder = new ProcessBuilder(command);
    System.out.println(command);
    builder.environment().putAll(env);
    return builder.start();
}