Example usage for org.apache.hadoop.mapred TaskAttemptID getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID getJobID.

Prototype

public JobID getJobID()

Source Link

Usage

From source file:com.facebook.hiveio.output.HiveOutput.java

License:Apache License

/**
 * Write records to a Hive table// w  w w  .  ja v  a2 s .co m
 *
 * @param outputDesc description of Hive table
 * @param records the records to write
 * @throws TException
 * @throws IOException
 * @throws InterruptedException
 */
public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records)
        throws TException, IOException, InterruptedException {
    long uniqueId = System.nanoTime();
    String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0";

    String profile = Long.toString(uniqueId);

    HiveConf conf = new HiveConf(HiveOutput.class);
    conf.setInt("mapred.task.partition", 1);
    conf.set("mapred.task.id", taskAttemptIdStr);

    HiveApiOutputFormat.initProfile(conf, outputDesc, profile);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(profile);

    TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID);
    JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID());

    RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext);

    HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupJob(jobContext);

    committer.setupTask(taskContext);
    for (HiveWritableRecord record : records) {
        recordWriter.write(NullWritable.get(), record);
    }
    recordWriter.close(taskContext);
    committer.commitTask(taskContext);

    committer.commitJob(jobContext);
}

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }/*  w w  w  .  j a v  a 2s . co m*/

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());
    //----------------------------------------------------------------------------------
}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration/*from  w w w  . j  a  v a 2 s .c  o  m*/
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  w  ww . j  av  a  2  s .co m*/
public void initialize() throws Exception {
    // Reset any static variables to avoid conflict in container-reuse.
    sampleVertex = null;
    sampleMap = null;

    // Reset static variables cleared for avoiding OOM.
    new JVMReuseImpl().cleanupStaticData();

    // Set an empty reporter for now. Once we go to Tez 0.8
    // which adds support for mapreduce like progress (TEZ-808),
    // we need to call progress on Tez API
    PhysicalOperator.setReporter(new ProgressableReporter());

    UserPayload payload = getContext().getUserPayload();
    conf = TezUtils.createConfFromUserPayload(payload);
    SpillableMemoryManager.getInstance().configure(conf);
    PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list")));
    Properties log4jProperties = (Properties) ObjectSerializer
            .deserialize(conf.get(PigImplConstants.PIG_LOG4J_PROPERTIES));
    if (log4jProperties != null) {
        PropertyConfigurator.configure(log4jProperties);
    }

    // To determine front-end in UDFContext
    conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());

    // For compatibility with mapreduce. Some users use these configs in their UDF
    // Copied logic from the tez class - org.apache.tez.mapreduce.output.MROutput
    // Currently isMapperOutput is always false. Setting it to true produces empty output with MROutput
    boolean isMapperOutput = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    conf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    conf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    conf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    conf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    conf.set(JobContext.ID, taskAttemptId.getJobID().toString());
    if (conf.get(PigInputFormat.PIG_INPUT_LIMITS) != null) {
        // Has Load and is a root vertex
        conf.setInt(JobContext.NUM_MAPS, getContext().getVertexParallelism());
    } else {
        conf.setInt(JobContext.NUM_REDUCES, getContext().getVertexParallelism());
    }

    conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
    UDFContext.getUDFContext().addJobConf(conf);
    UDFContext.getUDFContext().deserialize();

    String execPlanString = conf.get(PLAN);
    execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
    SchemaTupleBackend.initialize(conf);
    PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());

    // Set the job conf as a thread-local member of PigMapReduce
    // for backwards compatibility with the existing code base.
    PigMapReduce.sJobConfInternal.set(conf);

    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new TezTaskContext(getContext()));
    pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);

    LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan,
            TezTaskConfigurable.class);
    for (TezTaskConfigurable tezTC : tezTCs) {
        tezTC.initialize(getContext());
    }
}

From source file:org.apache.tez.mapreduce.output.MROutput.java

License:Apache License

@Override
public List<Event> initialize() throws IOException, InterruptedException {
    LOG.info("Initializing Simple Output");
    getContext().requestInitialMemory(0l, null); //mandatory call
    taskNumberFormat.setMinimumIntegerDigits(5);
    taskNumberFormat.setGroupingUsed(false);
    nonTaskNumberFormat.setMinimumIntegerDigits(3);
    nonTaskNumberFormat.setGroupingUsed(false);
    Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RW / OF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    this.useNewApi = this.jobConf.getUseNewMapper();
    this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(),
                    getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
                    getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());

    if (useNewApi) {
        // set the output part name to have a unique prefix
        if (jobConf.get("mapreduce.output.basename") == null) {
            jobConf.set("mapreduce.output.basename", getOutputFileNamePrefix());
        }//from  ww w.  ja  v a 2s .  c  om
    }

    outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);

    if (useNewApi) {
        newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId);
        try {
            newOutputFormat = org.apache.hadoop.util.ReflectionUtils
                    .newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }

        try {
            newRecordWriter = newOutputFormat.getRecordWriter(newApiTaskAttemptContext);
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while creating record writer", e);
        }
    } else {
        oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf,
                taskAttemptId, new MRTaskReporter(getContext()));
        oldOutputFormat = jobConf.getOutputFormat();

        FileSystem fs = FileSystem.get(jobConf);
        String finalName = getOutputName();

        oldRecordWriter = oldOutputFormat.getRecordWriter(fs, jobConf, finalName,
                new MRReporter(getContext().getCounters()));
    }
    initCommitter(jobConf, useNewApi);

    LOG.info("Initialized Simple Output" + ", using_new_api: " + useNewApi);
    return null;
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

/**
 * should hold the lock on tracker object by heartbeat()
 * @param tip//from  ww  w. j a va  2s.  co  m
 * @param trackerName
 */
void createTaskEntry(TaskAttemptID taskid, String taskTracker, TaskInProgress tip) {
    this.taskidToTIP.put(taskid, tip);
    JobID jobid = taskid.getJobID();
    synchronized (plannedJobs) {
        PlannedJob job = this.plannedJobs.get(jobid);
        if (job != null && job.remove(taskid)) {
            this.plannedJobs.remove(jobid);
        }
    }
}