List of usage examples for org.apache.hadoop.mapred TaskAttemptID forName
public static TaskAttemptID forName(String str) throws IllegalArgumentException
From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelMap.java
License:Apache License
@Override public void configure(JobConf conf) { TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id")); this.taskID = attempt.getTaskID().getId(); // todo verify this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1); this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1); this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false); this.isInit = this.isFirst; if (this.isInit) this.ambassadors = new HashMap<LongWritable, List<LongWritable>>(); else//from w ww .j a v a 2 s . c o m this.ambassadors = ForestFireModelUtils .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS)); }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelMap.java
License:Apache License
@Override public void configure(JobConf conf) { TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id")); this.taskID = attempt.getTaskID().getId(); this.newVerticesPerSlot = conf.getInt(ForestFireModelUtils.NEW_VERTICES_NR, -1); this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1); this.isFirst = conf.getBoolean(ForestFireModelUtils.IS_INIT, false); this.isInit = this.isFirst; if (this.isInit) this.ambassadors = new HashMap<LongWritable, List<LongWritable>>(); else//from w ww. ja v a 2s . c o m this.ambassadors = ForestFireModelUtils .verticesIdsString2Map(conf.get(ForestFireModelUtils.CURRENT_AMBASSADORS)); }
From source file:org.apache.avro.mapred.tether.TetheredProcess.java
License:Apache License
private Process startSubprocess(JobConf job) throws IOException, InterruptedException { // get the executable command List<String> command = new ArrayList<String>(); String executable = ""; if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) { //we want to use the cached executable Path[] localFiles = DistributedCache.getLocalCacheFiles(job); if (localFiles == null) { // until MAPREDUCE-476 URI[] files = DistributedCache.getCacheFiles(job); localFiles = new Path[] { new Path(files[0].toString()) }; }/*from w w w. ja v a 2 s . co m*/ executable = localFiles[0].toString(); FileUtil.chmod(executable.toString(), "a+x"); } else { executable = job.get(TetherJob.TETHER_EXEC); } command.add(executable); // Add the executable arguments. We assume the arguments are separated by // spaces so we split the argument string based on spaces and add each // token to command We need to do it this way because // TaskLog.captureOutAndError will put quote marks around each argument so // if we pass a single string containing all arguments we get quoted // incorrectly String args = job.get(TetherJob.TETHER_EXEC_ARGS); String[] aparams = args.split("\n"); for (int i = 0; i < aparams.length; i++) { aparams[i] = aparams[i].trim(); if (aparams[i].length() > 0) { command.add(aparams[i]); } } if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null) System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR")); // wrap the command in a stdout/stderr capture TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id")); File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(job); command = TaskLog.captureOutAndError(command, stdout, stderr, logLength); stdout.getParentFile().mkdirs(); stderr.getParentFile().mkdirs(); // add output server's port to env Map<String, String> env = new HashMap<String, String>(); env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort())); // start child process ProcessBuilder builder = new ProcessBuilder(command); System.out.println(command); builder.environment().putAll(env); return builder.start(); }
From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException//from www. j a v a2 s. com */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext; try { jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
@Override public void open(int taskNumber, int numTasks) throws IOException { try {//from w w w . java 2s . c o m StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); serializer = (AbstractSerDe) Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance(); ReflectionUtils.setConf(serializer, jobConf); // TODO: support partition properties, for now assume they're same as table properties SerDeUtils.initializeSerDe(serializer, jobConf, tblProperties, null); outputClass = serializer.getSerializedClass(); } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) { throw new FlinkRuntimeException("Error initializing Hive serializer", e); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0") + taskNumber + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); if (!isDynamicPartition) { staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation()); } List<ObjectInspector> objectInspectors = new ArrayList<>(); for (int i = 0; i < rowTypeInfo.getArity() - partitionCols.size(); i++) { objectInspectors.add(HiveTableUtil .getObjectInspector(LegacyTypeInfoDataTypeConverter.toDataType(rowTypeInfo.getTypeAt(i)))); } if (!isPartitioned) { rowObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(Arrays.asList(rowTypeInfo.getFieldNames()), objectInspectors); numNonPartitionCols = rowTypeInfo.getArity(); } else { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays .asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - partitionCols.size()), objectInspectors); numNonPartitionCols = rowTypeInfo.getArity() - partitionCols.size(); } }
From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException//from w w w. ja va 2s . c o m */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }
From source file:org.apache.hive.hcatalog.mapreduce.HCatMapRedUtils.java
License:Apache License
/** * Creates a {@code TaskAttemptID} from the provided TaskAttemptContext. This * also performs logic to strip the crunch named output from the TaskAttemptID * already associated with the TaskAttemptContext. The TaskAttemptID requires * there to be six parts, separated by "_". With the named output the JobID * has 7 parts. That needs to be stripped away before a new TaskAttemptID can * be constructed./*from ww w. jav a 2 s . co m*/ * * @param context * The TaskAttemptContext * @return A TaskAttemptID with the crunch named output removed */ public static TaskAttemptID getTaskAttemptID(TaskAttemptContext context) { String taskAttemptId = context.getTaskAttemptID().toString(); List<String> taskAttemptIDParts = Lists.newArrayList(taskAttemptId.split("_")); if (taskAttemptIDParts.size() < 7) return TaskAttemptID.forName(taskAttemptId); // index 2 is the 3rd element in the task attempt id, which will be the // named output taskAttemptIDParts.remove(2); String reducedTaskAttemptId = StringUtils.join(taskAttemptIDParts, "_"); return TaskAttemptID.forName(reducedTaskAttemptId); }
From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java
License:Open Source License
public void configure(JobConf job) { _attemptID = TaskAttemptID.forName(job.get("mapred.task.id")); _maxAttemptsPerTask = job.getInt("mapred.max.tracker.failures", 4); _splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown"); }
From source file:org.commoncrawl.hadoop.template.SampleHadoopJob.java
License:Open Source License
/** overloaded to initialize class variables from job config **/ @Override//from ww w .j a va 2 s .co m public void configure(JobConf job) { attemptID = TaskAttemptID.forName(job.get("mapred.task.id")); maxAttemptTaskId = job.getInt("mapred.max.tracker.failures", 4) - 1; splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown"); pattern = Pattern.compile(job.get("mapred.mapper.regex")); group = job.getInt("mapred.mapper.regex.group", 0); }
From source file:org.elasticsearch.hadoop.mr.HadoopCfgUtils.java
License:Apache License
public static TaskID getTaskID(Configuration cfg) { // first try with the attempt since some Hadoop versions mix the two String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg); if (StringUtils.hasText(taskAttemptId)) { try {//from w w w . j a v a2 s .co m return TaskAttemptID.forName(taskAttemptId).getTaskID(); } catch (IllegalArgumentException ex) { // the task attempt is invalid (Tez in particular uses the wrong string - see #346) // try to fallback to task id return parseTaskIdFromTaskAttemptId(taskAttemptId); } } String taskIdProp = HadoopCfgUtils.getTaskId(cfg); // double-check task id bug in Hadoop 2.5.x if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) { return TaskID.forName(taskIdProp); } return null; }