Example usage for org.apache.hadoop.mapred TaskAttemptID getTaskID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID getTaskID.

Prototype

public TaskID getTaskID()

Source Link

Usage

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(),
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());
    //----------------------------------------------------------------------------------
}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration//from  w  w  w  .  j  a  va  2s .  c o  m
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId(); // todo verify
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else/*from ww  w.  jav  a 2s .  c  o m*/
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelMap.java

License:Apache License

@Override
public void configure(JobConf conf) {
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    this.taskID = attempt.getTaskID().getId();
    this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isInit = this.isFirst;

    if (this.isInit)
        this.ambassadors = new HashMap<LongWritable, List<LongWritable>>();
    else/*from   ww w  . j ava  2s  .co m*/
        this.ambassadors = ForestFireModelUtils
                .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS));
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from w  w  w. ja v  a2  s  . c o  m*/
public void initialize() throws Exception {
    // Reset any static variables to avoid conflict in container-reuse.
    sampleVertex = null;
    sampleMap = null;

    // Reset static variables cleared for avoiding OOM.
    new JVMReuseImpl().cleanupStaticData();

    // Set an empty reporter for now. Once we go to Tez 0.8
    // which adds support for mapreduce like progress (TEZ-808),
    // we need to call progress on Tez API
    PhysicalOperator.setReporter(new ProgressableReporter());

    UserPayload payload = getContext().getUserPayload();
    conf = TezUtils.createConfFromUserPayload(payload);
    SpillableMemoryManager.getInstance().configure(conf);
    PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list")));
    Properties log4jProperties = (Properties) ObjectSerializer
            .deserialize(conf.get(PigImplConstants.PIG_LOG4J_PROPERTIES));
    if (log4jProperties != null) {
        PropertyConfigurator.configure(log4jProperties);
    }

    // To determine front-end in UDFContext
    conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());

    // For compatibility with mapreduce. Some users use these configs in their UDF
    // Copied logic from the tez class - org.apache.tez.mapreduce.output.MROutput
    // Currently isMapperOutput is always false. Setting it to true produces empty output with MROutput
    boolean isMapperOutput = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    conf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    conf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    conf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    conf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    conf.set(JobContext.ID, taskAttemptId.getJobID().toString());
    if (conf.get(PigInputFormat.PIG_INPUT_LIMITS) != null) {
        // Has Load and is a root vertex
        conf.setInt(JobContext.NUM_MAPS, getContext().getVertexParallelism());
    } else {
        conf.setInt(JobContext.NUM_REDUCES, getContext().getVertexParallelism());
    }

    conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
    UDFContext.getUDFContext().addJobConf(conf);
    UDFContext.getUDFContext().deserialize();

    String execPlanString = conf.get(PLAN);
    execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
    SchemaTupleBackend.initialize(conf);
    PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());

    // Set the job conf as a thread-local member of PigMapReduce
    // for backwards compatibility with the existing code base.
    PigMapReduce.sJobConfInternal.set(conf);

    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new TezTaskContext(getContext()));
    pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);

    LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan,
            TezTaskConfigurable.class);
    for (TezTaskConfigurable tezTC : tezTCs) {
        tezTC.initialize(getContext());
    }
}

From source file:org.apache.tez.mapreduce.hadoop.IDConverter.java

License:Apache License

public static TezTaskAttemptID fromMRTaskAttemptId(org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptId) {
    return TezTaskAttemptID.getInstance(fromMRTaskId(taskAttemptId.getTaskID()), taskAttemptId.getId());
}

From source file:org.apache.tez.mapreduce.output.MROutput.java

License:Apache License

@Override
public List<Event> initialize() throws IOException, InterruptedException {
    LOG.info("Initializing Simple Output");
    getContext().requestInitialMemory(0l, null); //mandatory call
    taskNumberFormat.setMinimumIntegerDigits(5);
    taskNumberFormat.setGroupingUsed(false);
    nonTaskNumberFormat.setMinimumIntegerDigits(3);
    nonTaskNumberFormat.setGroupingUsed(false);
    Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RW / OF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    this.useNewApi = this.jobConf.getUseNewMapper();
    this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());

    if (useNewApi) {
        // set the output part name to have a unique prefix
        if (jobConf.get("mapreduce.output.basename") == null) {
            jobConf.set("mapreduce.output.basename", getOutputFileNamePrefix());
        }//from   w  w w .j  a  v a2  s.  c om
    }

    outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);

    if (useNewApi) {
        newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId);
        try {
            newOutputFormat = org.apache.hadoop.util.ReflectionUtils
                    .newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }

        try {
            newRecordWriter = newOutputFormat.getRecordWriter(newApiTaskAttemptContext);
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while creating record writer", e);
        }
    } else {
        oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf,
                taskAttemptId, new MRTaskReporter(getContext()));
        oldOutputFormat = jobConf.getOutputFormat();

        FileSystem fs = FileSystem.get(jobConf);
        String finalName = getOutputName();

        oldRecordWriter = oldOutputFormat.getRecordWriter(fs, jobConf, finalName,
                new MRReporter(getContext().getCounters()));
    }
    initCommitter(jobConf, useNewApi);

    LOG.info("Initialized Simple Output" + ", using_new_api: " + useNewApi);
    return null;
}

From source file:org.sf.xrime.algorithms.pagerank.PageRankMapper.java

License:Apache License

/** 
 * Keep JobConf to read parameters and create communication to PageRankStep.
 * @see org.sf.xrime.algorithms.utils.GraphAlgorithmMapReduceBase#configure(org.apache.hadoop.mapred.JobConf)
 *///  w  w  w.j a  v  a  2  s .  c o m
public void configure(JobConf job) {
    super.configure(job);
    jobConf = job;

    TaskAttemptID taskId = TaskAttemptID.forName(job.get("mapred.task.id"));
    taskID = taskId.getTaskID().toString();
}