List of usage examples for org.apache.hadoop.mapred TaskAttemptID toString
@Override
public String toString()
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) { TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(), context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(), context.getTaskAttemptNumber(), isMapperOutput); config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); }
From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java
License:Apache License
public void configure(JobConf conf) { this.config = conf; storeshortnames = config.getBoolean("uima.store.short.names", true); File pearpath = new File(conf.get("uima.pear.path")); String pearname = pearpath.getName(); URL urlPEAR = null;//w ww .j av a 2s . c o m try { Path[] localArchives = DistributedCache.getLocalCacheFiles(conf); // identify the right archive for (Path la : localArchives) { String localPath = la.toUri().toString(); LOG.info("Inspecting local paths " + localPath); if (!localPath.endsWith(pearname)) continue; urlPEAR = new URL("file://" + localPath); break; } } catch (IOException e) { throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e); } if (urlPEAR == null) throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache"); File pearFile = new File(urlPEAR.getPath()); // should check whether a different mapper has already unpacked it // but for now we just unpack in a different location for every mapper TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id")); installDir = new File(pearFile.getParentFile(), attempt.toString()); PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true); // get the resources required for the AnalysisEngine org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager(); // Create analysis engine from the installed PEAR package using // the created PEAR specifier XMLInputSource in; try { in = new XMLInputSource(instPear.getComponentPearDescPath()); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null); cas = tae.newCAS(); } catch (Exception e) { throw new RuntimeException(e); } String[] featuresFilters = this.config.get("uima.features.filter", "").split(","); // the featurefilters have the following form : Type:featureName // we group them by annotation type for (String ff : featuresFilters) { String[] fp = ff.split(":"); if (fp.length != 2) continue; Set<Feature> features = featfilts.get(fp[0]); if (features == null) { features = new HashSet<Feature>(); featfilts.put(fp[0], features); } Feature f = cas.getTypeSystem().getFeatureByFullName(ff); if (f != null) features.add(f); } String[] annotTypes = this.config.get("uima.annotations.filter", "").split(","); uimatypes = new ArrayList<Type>(annotTypes.length); for (String type : annotTypes) { Type aType = cas.getTypeSystem().getType(type); uimatypes.add(aType); } }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java
License:Apache License
public void open() throws Exception { this.conf = new JobConf(); this.reporter = Reporter.NULL; // Some OutputFormats (like FileOutputFormat) require that the job id/task id set. // So let's set it for all output formats, just in case they need it too. JobID jobid = new JobID("sequential", jobCounter.getAndIncrement()); TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 0), 0); conf.set("mapred.task.id", taskid.toString()); setSequential(conf);/*from w w w . jav a 2s . com*/ // Create a task so we can use committers. sequentialJob = new ExposeJobContext(conf, jobid); sequentialTask = new ExposeTaskAttemptContext(conf, taskid); // Give the commiter a chance initialize. OutputCommitter committer = conf.getOutputCommitter(); // FIXME: We skip job setup for now because committer.setupJob(sequentialJob); committer.setupTask(sequentialTask); if (oFormat instanceof JobConfigurable) ((JobConfigurable) oFormat).configure(conf); }
From source file:com.mellanox.hadoop.mapred.UdaPlugin.java
License:Apache License
public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf, Reporter reporter, int numMaps) throws IOException { super(jobConf); this.udaShuffleConsumer = udaShuffleConsumer; this.reduceTask = reduceTask; String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr); long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024); long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16); long shuffleMemorySize = totalRdmaSize; StringBuilder meminfoSb = new StringBuilder(); meminfoSb.append("UDA: numMaps=").append(numMaps); meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize); meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB"); meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize); if (totalRdmaSize < 0) { LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize); }//from w ww . ja va 2 s. co m if (totalRdmaSize <= 0) { long maxHeapSize = Runtime.getRuntime().maxMemory(); double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent", DEFAULT_SHUFFLE_INPUT_PERCENT); if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) { LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: " + DEFAULT_SHUFFLE_INPUT_PERCENT); shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT; } shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent); LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory"); meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B"); meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent); meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B"); LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB"); } else { LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory"); LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB"); } LOG.debug(meminfoSb.toString()); LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB"); LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB"); if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution()) LOG.info("UDA has limited support for map task speculative execution"); } LOG.info("UDA: number of segments to fetch: " + numMaps); /* init variables */ init_kv_bufs(); launchCppSide(true, this); // true: this is RT => we should execute NetMerger this.j2c_queue = new J2CQueue<K, V>(); this.mTaskReporter = reporter; this.mMapsNeed = numMaps; /* send init message */ TaskAttemptID reduceId = reduceTask.getTaskID(); mParams.clear(); mParams.add(Integer.toString(numMaps)); mParams.add(reduceId.getJobID().toString()); mParams.add(reduceId.toString()); mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0")); mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes) mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment mParams.add(jobConf.getOutputKeyClass().getName()); boolean compression = jobConf.getCompressMapOutput(); //"true" or "false" String alg = null; if (compression) { alg = jobConf.get("mapred.map.output.compression.codec", null); } mParams.add(alg); String bufferSize = Integer.toString(256 * 1024); if (alg != null) { if (alg.contains("lzo.LzoCodec")) { bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize); } else if (alg.contains("SnappyCodec")) { bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize); } } mParams.add(bufferSize); mParams.add(Long.toString(shuffleMemorySize)); String[] dirs = jobConf.getLocalDirs(); ArrayList<String> dirsCanBeCreated = new ArrayList<String>(); //checking if the directories can be created for (int i = 0; i < dirs.length; i++) { try { DiskChecker.checkDir(new File(dirs[i].trim())); //saving only the directories that can be created dirsCanBeCreated.add(dirs[i].trim()); } catch (DiskErrorException e) { } } //sending the directories int numDirs = dirsCanBeCreated.size(); mParams.add(Integer.toString(numDirs)); for (int i = 0; i < numDirs; i++) { mParams.add(dirsCanBeCreated.get(i)); } LOG.info("mParams array is " + mParams); LOG.info("UDA: sending INIT_COMMAND"); String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams); UdaBridge.doCommand(msg); this.mProgress = new Progress(); this.mProgress.set(0.5f); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) { //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString()); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setBoolean("mapred.task.is.map", false); jobConf.setInt("mapred.task.partition", partition); jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString()); //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString()); jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString()); jobConf.setBoolean(TASK_ISMAP, false); jobConf.setInt(TASK_PARTITION, partition); jobConf.set(ID, taskAttemptID.getJobID().toString()); //---------------------------------------------------------------------------------- }
From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java
License:Apache License
public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path p = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString())); try {// ww w .jav a2s . c o m FileSystem fs = p.getFileSystem(conf); return p.makeQualified(fs); } catch (IOException ie) { LOG.warn(StringUtils.stringifyException(ie)); return p; } } return null; }
From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException// w w w . jav a2s. c om */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.jobConf.set("mapred.task.id", taskAttemptID.toString()); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }
From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException//from www . ja v a 2s. com */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext; try { jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
@Override public void open(int taskNumber, int numTasks) throws IOException { try {/* ww w.j a v a 2 s .c o m*/ StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); serializer = (AbstractSerDe) Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance(); ReflectionUtils.setConf(serializer, jobConf); // TODO: support partition properties, for now assume they're same as table properties SerDeUtils.initializeSerDe(serializer, jobConf, tblProperties, null); outputClass = serializer.getSerializedClass(); } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) { throw new FlinkRuntimeException("Error initializing Hive serializer", e); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0") + taskNumber + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); if (!isDynamicPartition) { staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation()); } List<ObjectInspector> objectInspectors = new ArrayList<>(); for (int i = 0; i < rowTypeInfo.getArity() - partitionCols.size(); i++) { objectInspectors.add(HiveTableUtil .getObjectInspector(LegacyTypeInfoDataTypeConverter.toDataType(rowTypeInfo.getTypeAt(i)))); } if (!isPartitioned) { rowObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(Arrays.asList(rowTypeInfo.getFieldNames()), objectInspectors); numNonPartitionCols = rowTypeInfo.getArity(); } else { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays .asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - partitionCols.size()), objectInspectors); numNonPartitionCols = rowTypeInfo.getArity() - partitionCols.size(); } }
From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException//from w w w. ja va2 s . c o m */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }