List of usage examples for org.apache.hadoop.mapred TaskAttemptID getJobID
public JobID getJobID()
From source file:com.facebook.hiveio.output.HiveOutput.java
License:Apache License
/** * Write records to a Hive table// w w w . ja v a2 s .co m * * @param outputDesc description of Hive table * @param records the records to write * @throws TException * @throws IOException * @throws InterruptedException */ public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records) throws TException, IOException, InterruptedException { long uniqueId = System.nanoTime(); String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0"; String profile = Long.toString(uniqueId); HiveConf conf = new HiveConf(HiveOutput.class); conf.setInt("mapred.task.partition", 1); conf.set("mapred.task.id", taskAttemptIdStr); HiveApiOutputFormat.initProfile(conf, outputDesc, profile); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(profile); TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr); TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID); JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID()); RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext); HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext); committer.setupJob(jobContext); committer.setupTask(taskContext); for (HiveWritableRecord record : records) { recordWriter.write(NullWritable.get(), record); } recordWriter.close(taskContext); committer.commitTask(taskContext); committer.commitJob(jobContext); }
From source file:com.mellanox.hadoop.mapred.UdaPlugin.java
License:Apache License
public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf, Reporter reporter, int numMaps) throws IOException { super(jobConf); this.udaShuffleConsumer = udaShuffleConsumer; this.reduceTask = reduceTask; String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr); long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024); long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16); long shuffleMemorySize = totalRdmaSize; StringBuilder meminfoSb = new StringBuilder(); meminfoSb.append("UDA: numMaps=").append(numMaps); meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize); meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB"); meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize); if (totalRdmaSize < 0) { LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize); }/* w w w . j a v a 2s . co m*/ if (totalRdmaSize <= 0) { long maxHeapSize = Runtime.getRuntime().maxMemory(); double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent", DEFAULT_SHUFFLE_INPUT_PERCENT); if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) { LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: " + DEFAULT_SHUFFLE_INPUT_PERCENT); shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT; } shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent); LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory"); meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B"); meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent); meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B"); LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB"); } else { LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory"); LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB"); } LOG.debug(meminfoSb.toString()); LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB"); LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB"); if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution()) LOG.info("UDA has limited support for map task speculative execution"); } LOG.info("UDA: number of segments to fetch: " + numMaps); /* init variables */ init_kv_bufs(); launchCppSide(true, this); // true: this is RT => we should execute NetMerger this.j2c_queue = new J2CQueue<K, V>(); this.mTaskReporter = reporter; this.mMapsNeed = numMaps; /* send init message */ TaskAttemptID reduceId = reduceTask.getTaskID(); mParams.clear(); mParams.add(Integer.toString(numMaps)); mParams.add(reduceId.getJobID().toString()); mParams.add(reduceId.toString()); mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0")); mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes) mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment mParams.add(jobConf.getOutputKeyClass().getName()); boolean compression = jobConf.getCompressMapOutput(); //"true" or "false" String alg = null; if (compression) { alg = jobConf.get("mapred.map.output.compression.codec", null); } mParams.add(alg); String bufferSize = Integer.toString(256 * 1024); if (alg != null) { if (alg.contains("lzo.LzoCodec")) { bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize); } else if (alg.contains("SnappyCodec")) { bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize); } } mParams.add(bufferSize); mParams.add(Long.toString(shuffleMemorySize)); String[] dirs = jobConf.getLocalDirs(); ArrayList<String> dirsCanBeCreated = new ArrayList<String>(); //checking if the directories can be created for (int i = 0; i < dirs.length; i++) { try { DiskChecker.checkDir(new File(dirs[i].trim())); //saving only the directories that can be created dirsCanBeCreated.add(dirs[i].trim()); } catch (DiskErrorException e) { } } //sending the directories int numDirs = dirsCanBeCreated.size(); mParams.add(Integer.toString(numDirs)); for (int i = 0; i < numDirs; i++) { mParams.add(dirsCanBeCreated.get(i)); } LOG.info("mParams array is " + mParams); LOG.info("UDA: sending INIT_COMMAND"); String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams); UdaBridge.doCommand(msg); this.mProgress = new Progress(); this.mProgress.set(0.5f); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) { //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString()); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setBoolean("mapred.task.is.map", false); jobConf.setInt("mapred.task.partition", partition); jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString()); //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString()); jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString()); jobConf.setBoolean(TASK_ISMAP, false); jobConf.setInt(TASK_PARTITION, partition); jobConf.set(ID, taskAttemptID.getJobID().toString()); //---------------------------------------------------------------------------------- }
From source file:it.crs4.pydoop.pipes.Application.java
License:Apache License
/** * Start the child process to handle the task for us. * @param conf the task's configuration/*from w w w . j a v a 2 s .c o m*/ * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String, String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort())); TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); // get the task's working directory String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(), taskid.getTaskID().toString(), false); //Add token to the environment if security is enabled Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.getPassword(); String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath(); writePasswordToLocalFile(localPasswordFile, password, conf); env.put("hadoop.pipes.shared.secret.location", localPasswordFile); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get(Submitter.INTERPRETOR); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); if (!(new File(executable).canExecute())) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.chmod(executable, "u+x"); } cmd.add(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); String challenge = getSecurityChallenge(); String digestToSend = createDigest(password, challenge); String digestExpected = createDigest(password, digestToSend); handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.authenticate(digestToSend, challenge); waitForAuthentication(); LOG.debug("Authentication succeeded"); downlink.start(); downlink.setJobConf(conf); }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.runtime.PigProcessor.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w ww . j av a 2 s .co m*/ public void initialize() throws Exception { // Reset any static variables to avoid conflict in container-reuse. sampleVertex = null; sampleMap = null; // Reset static variables cleared for avoiding OOM. new JVMReuseImpl().cleanupStaticData(); // Set an empty reporter for now. Once we go to Tez 0.8 // which adds support for mapreduce like progress (TEZ-808), // we need to call progress on Tez API PhysicalOperator.setReporter(new ProgressableReporter()); UserPayload payload = getContext().getUserPayload(); conf = TezUtils.createConfFromUserPayload(payload); SpillableMemoryManager.getInstance().configure(conf); PigContext.setPackageImportList( (ArrayList<String>) ObjectSerializer.deserialize(conf.get("udf.import.list"))); Properties log4jProperties = (Properties) ObjectSerializer .deserialize(conf.get(PigImplConstants.PIG_LOG4J_PROPERTIES)); if (log4jProperties != null) { PropertyConfigurator.configure(log4jProperties); } // To determine front-end in UDFContext conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier()); // For compatibility with mapreduce. Some users use these configs in their UDF // Copied logic from the tez class - org.apache.tez.mapreduce.output.MROutput // Currently isMapperOutput is always false. Setting it to true produces empty output with MROutput boolean isMapperOutput = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false); TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput); conf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); conf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); conf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); conf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); conf.set(JobContext.ID, taskAttemptId.getJobID().toString()); if (conf.get(PigInputFormat.PIG_INPUT_LIMITS) != null) { // Has Load and is a root vertex conf.setInt(JobContext.NUM_MAPS, getContext().getVertexParallelism()); } else { conf.setInt(JobContext.NUM_REDUCES, getContext().getVertexParallelism()); } conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex())); UDFContext.getUDFContext().addJobConf(conf); UDFContext.getUDFContext().deserialize(); String execPlanString = conf.get(PLAN); execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString); SchemaTupleBackend.initialize(conf); PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID()); // Set the job conf as a thread-local member of PigMapReduce // for backwards compatibility with the existing code base. PigMapReduce.sJobConfInternal.set(conf); Utils.setDefaultTimeZone(conf); boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new TezTaskContext(getContext())); pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan, TezTaskConfigurable.class); for (TezTaskConfigurable tezTC : tezTCs) { tezTC.initialize(getContext()); } }
From source file:org.apache.tez.mapreduce.output.MROutput.java
License:Apache License
@Override public List<Event> initialize() throws IOException, InterruptedException { LOG.info("Initializing Simple Output"); getContext().requestInitialMemory(0l, null); //mandatory call taskNumberFormat.setMinimumIntegerDigits(5); taskNumberFormat.setGroupingUsed(false); nonTaskNumberFormat.setMinimumIntegerDigits(3); nonTaskNumberFormat.setGroupingUsed(false); Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); this.jobConf = new JobConf(conf); // Add tokens to the jobConf - in case they are accessed within the RW / OF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); this.useNewApi = this.jobConf.getUseNewMapper(); this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput); jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString()); if (useNewApi) { // set the output part name to have a unique prefix if (jobConf.get("mapreduce.output.basename") == null) { jobConf.set("mapreduce.output.basename", getOutputFileNamePrefix()); }//from ww w. ja v a 2s . c om } outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS); if (useNewApi) { newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId); try { newOutputFormat = org.apache.hadoop.util.ReflectionUtils .newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } try { newRecordWriter = newOutputFormat.getRecordWriter(newApiTaskAttemptContext); } catch (InterruptedException e) { throw new IOException("Interrupted while creating record writer", e); } } else { oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf, taskAttemptId, new MRTaskReporter(getContext())); oldOutputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); String finalName = getOutputName(); oldRecordWriter = oldOutputFormat.getRecordWriter(fs, jobConf, finalName, new MRReporter(getContext().getCounters())); } initCommitter(jobConf, useNewApi); LOG.info("Initialized Simple Output" + ", using_new_api: " + useNewApi); return null; }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
/** * should hold the lock on tracker object by heartbeat() * @param tip//from ww w. j a va 2s. co m * @param trackerName */ void createTaskEntry(TaskAttemptID taskid, String taskTracker, TaskInProgress tip) { this.taskidToTIP.put(taskid, tip); JobID jobid = taskid.getJobID(); synchronized (plannedJobs) { PlannedJob job = this.plannedJobs.get(jobid); if (job != null && job.remove(taskid)) { this.plannedJobs.remove(jobid); } } }