Example usage for org.apache.hadoop.mapred TaskAttemptID getTaskID

List of usage examples for org.apache.hadoop.mapred TaskAttemptID getTaskID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID getTaskID.

Prototype

public TaskID getTaskID() 

Source Link

Usage

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(),
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());
    //----------------------------------------------------------------------------------
}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration//from  w  w  w  .  j  a  va  2s .  c o  m
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId(); // todo verify
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else/*from ww  w.  jav  a 2s .  c  o m*/
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId();
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else/*from   ww w  . j ava  2s  .co m*/
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from w  w  w. ja v  a2  s  . c o  m*/
public void initialize() throws Exception {
    // Reset any static variables to avoid conflict in container-reuse.
    sampleVertex = null;
    sampleMap = null;

    // Reset static variables cleared for avoiding OOM.
    new JVMReuseImpl().cleanupStaticData();

    // Set an empty reporter for now. Once we go to Tez 0.8
    // which adds support for mapreduce like progress (TEZ-808),
    // we need to call progress on Tez API
    PhysicalOperator.setReporter(new ProgressableReporter());

    UserPayload payload = getContext().getUserPayload();
    conf = TezUtils.createConfFromUserPayload(payload);
    SpillableMemoryManager.getInstance().configure(conf);
    PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list")));
    Properties log4jProperties = (Properties) ObjectSerializer
            .deserialize(conf.get(PigImplConstants.PIG_LOG4J_PROPERTIES));
    if (log4jProperties != null) {
        PropertyConfigurator.configure(log4jProperties);
    }

    // To determine front-end in UDFContext
    conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());

    // For compatibility with mapreduce. Some users use these configs in their UDF
    // Copied logic from the tez class - org.apache.tez.mapreduce.output.MROutput
    // Currently isMapperOutput is always false. Setting it to true produces empty output with MROutput
    boolean isMapperOutput = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    conf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    conf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    conf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    conf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    conf.set(JobContext.ID, taskAttemptId.getJobID().toString());
    if (conf.get(PigInputFormat.PIG_INPUT_LIMITS) != null) {
        // Has Load and is a root vertex
        conf.setInt(JobContext.NUM_MAPS, getContext().getVertexParallelism());
    } else {
        conf.setInt(JobContext.NUM_REDUCES, getContext().getVertexParallelism());
    }

    conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
    UDFContext.getUDFContext().addJobConf(conf);
    UDFContext.getUDFContext().deserialize();

    String execPlanString = conf.get(PLAN);
    execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
    SchemaTupleBackend.initialize(conf);
    PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());

    // Set the job conf as a thread-local member of PigMapReduce
    // for backwards compatibility with the existing code base.
    PigMapReduce.sJobConfInternal.set(conf);

    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new TezTaskContext(getContext()));
    pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);

    LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan,
            TezTaskConfigurable.class);
    for (TezTaskConfigurable tezTC : tezTCs) {
        tezTC.initialize(getContext());
    }
}

From source file:org.apache.tez.mapreduce.hadoop.IDConverter.java

License:Apache License

public static TezTaskAttemptID fromMRTaskAttemptId(org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptId) {
    return TezTaskAttemptID.getInstance(fromMRTaskId(taskAttemptId.getTaskID()), taskAttemptId.getId());
}

From source file:org.apache.tez.mapreduce.output.MROutput.java

License:Apache License

@Override
public List<Event> initialize() throws IOException, InterruptedException {
    LOG.info("Initializing Simple Output");
    getContext().requestInitialMemory(0l, null); //mandatory call
    taskNumberFormat.setMinimumIntegerDigits(5);
    taskNumberFormat.setGroupingUsed(false);
    nonTaskNumberFormat.setMinimumIntegerDigits(3);
    nonTaskNumberFormat.setGroupingUsed(false);
    Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RW / OF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    this.useNewApi = this.jobConf.getUseNewMapper();
    this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());

    if (useNewApi) {
        // set the output part name to have a unique prefix
        if (jobConf.get("mapreduce.output.basename") == null) {
            jobConf.set("mapreduce.output.basename", getOutputFileNamePrefix());
        }//from   w  w w .j  a  v a2  s.  c om
    }

    outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);

    if (useNewApi) {
        newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId);
        try {
            newOutputFormat = org.apache.hadoop.util.ReflectionUtils
                    .newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }

        try {
            newRecordWriter = newOutputFormat.getRecordWriter(newApiTaskAttemptContext);
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while creating record writer", e);
        }
    } else {
        oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf,
                taskAttemptId, new MRTaskReporter(getContext()));
        oldOutputFormat = jobConf.getOutputFormat();

        FileSystem fs = FileSystem.get(jobConf);
        String finalName = getOutputName();

        oldRecordWriter = oldOutputFormat.getRecordWriter(fs, jobConf, finalName,
                new MRReporter(getContext().getCounters()));
    }
    initCommitter(jobConf, useNewApi);

    LOG.info("Initialized Simple Output" + ", using_new_api: " + useNewApi);
    return null;
}

From source file:org.sf.xrime.algorithms.pagerank.PageRankMapper.java

License:Apache License

/** 
 * Keep JobConf to read parameters and create communication to PageRankStep.
 * @see org.sf.xrime.algorithms.utils.GraphAlgorithmMapReduceBase#configure(org.apache.hadoop.mapred.JobConf)
 *///  w  w  w.j a  v  a  2  s .  c o m
public void configure(JobConf job) {
    super.configure(job);
    jobConf = job;

    TaskAttemptID taskId = TaskAttemptID.forName(job.get("mapred.task.id"));
    taskID = taskId.getTaskID().toString();
}