List of usage examples for org.apache.hadoop.mapred TaskAttemptID forName
public static TaskAttemptID forName(String str) throws IllegalArgumentException
From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java
License:Apache License
public void configure(JobConf conf) { this.config = conf; storeshortnames = config.getBoolean("uima.store.short.names", true); File pearpath = new File(conf.get("uima.pear.path")); String pearname = pearpath.getName(); URL urlPEAR = null;// www .j a va 2 s.c om try { Path[] localArchives = DistributedCache.getLocalCacheFiles(conf); // identify the right archive for (Path la : localArchives) { String localPath = la.toUri().toString(); LOG.info("Inspecting local paths " + localPath); if (!localPath.endsWith(pearname)) continue; urlPEAR = new URL("file://" + localPath); break; } } catch (IOException e) { throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e); } if (urlPEAR == null) throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache"); File pearFile = new File(urlPEAR.getPath()); // should check whether a different mapper has already unpacked it // but for now we just unpack in a different location for every mapper TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id")); installDir = new File(pearFile.getParentFile(), attempt.toString()); PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true); // get the resources required for the AnalysisEngine org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager(); // Create analysis engine from the installed PEAR package using // the created PEAR specifier XMLInputSource in; try { in = new XMLInputSource(instPear.getComponentPearDescPath()); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null); cas = tae.newCAS(); } catch (Exception e) { throw new RuntimeException(e); } String[] featuresFilters = this.config.get("uima.features.filter", "").split(","); // the featurefilters have the following form : Type:featureName // we group them by annotation type for (String ff : featuresFilters) { String[] fp = ff.split(":"); if (fp.length != 2) continue; Set<Feature> features = featfilts.get(fp[0]); if (features == null) { features = new HashSet<Feature>(); featfilts.put(fp[0], features); } Feature f = cas.getTypeSystem().getFeatureByFullName(ff); if (f != null) features.add(f); } String[] annotTypes = this.config.get("uima.annotations.filter", "").split(","); uimatypes = new ArrayList<Type>(annotTypes.length); for (String type : annotTypes) { Type aType = cas.getTypeSystem().getType(type); uimatypes.add(aType); } }
From source file:com.facebook.hiveio.output.HiveOutput.java
License:Apache License
/** * Write records to a Hive table//from ww w . j a v a 2 s . c om * * @param outputDesc description of Hive table * @param records the records to write * @throws TException * @throws IOException * @throws InterruptedException */ public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records) throws TException, IOException, InterruptedException { long uniqueId = System.nanoTime(); String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0"; String profile = Long.toString(uniqueId); HiveConf conf = new HiveConf(HiveOutput.class); conf.setInt("mapred.task.partition", 1); conf.set("mapred.task.id", taskAttemptIdStr); HiveApiOutputFormat.initProfile(conf, outputDesc, profile); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(profile); TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr); TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID); JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID()); RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext); HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext); committer.setupJob(jobContext); committer.setupTask(taskContext); for (HiveWritableRecord record : records) { recordWriter.write(NullWritable.get(), record); } recordWriter.close(taskContext); committer.commitTask(taskContext); committer.commitJob(jobContext); }
From source file:com.zjy.mongo.mapred.output.MongoRecordWriter.java
License:Apache License
/** * Create a new MongoRecordWriter./*from w w w . j a v a 2 s. c om*/ * @param conf the job configuration */ public MongoRecordWriter(final JobConf conf) { super(Collections.<DBCollection>emptyList(), new TaskAttemptContextImpl(conf, TaskAttemptID.forName(conf.get("mapred.task.id")))); configuration = conf; }
From source file:eu.stratosphere.hadoopcompatibility.HadoopOutputFormatWrapper.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException//from ww w . jav a 2s . c om */ @Override public void open(int taskNumber, int numTasks) throws IOException { this.fileOutputCommitterWrapper.setupJob(this.jobConf); if (Integer.toString(taskNumber + 1).length() <= 6) { this.jobConf.set("mapred.task.id", "attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ") .replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); //compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1 this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper .getTempTaskOutputPath(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))) .toString()); } else { throw new IOException("task id too large"); } this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new DummyHadoopProgressable()); }
From source file:eu.stratosphere.hadoopcompatibility.HadoopOutputFormatWrapper.java
License:Apache License
/** * commit the task by moving the output file out from the temporary directory. * @throws IOException// w w w . j av a2 s . c o m */ @Override public void close() throws IOException { this.recordWriter.close(new DummyHadoopReporter()); if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) { this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))); } //TODO: commitjob when all the tasks are finished }
From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException/*from w w w . j a va 2 s . c o m*/ */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.jobConf.set("mapred.task.id", taskAttemptID.toString()); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }
From source file:eu.stratosphere.hadoopcompatibility.mapred.record.HadoopRecordOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException/*ww w . j a va2 s. c o m*/ */ @Override public void open(int taskNumber, int numTasks) throws IOException { this.fileOutputCommitterWrapper.setupJob(this.jobConf); if (Integer.toString(taskNumber + 1).length() <= 6) { this.jobConf.set("mapred.task.id", "attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ") .replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); //compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1 this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper .getTempTaskOutputPath(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))) .toString()); } else { throw new IOException("task id too large"); } this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }
From source file:eu.stratosphere.hadoopcompatibility.mapred.record.HadoopRecordOutputFormat.java
License:Apache License
/** * commit the task by moving the output file out from the temporary directory. * @throws IOException/*from w w w. ja v a 2s . co m*/ */ @Override public void close() throws IOException { this.recordWriter.close(new HadoopDummyReporter()); if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) { this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))); } //TODO: commitjob when all the tasks are finished }
From source file:it.crs4.pydoop.pipes.Application.java
License:Apache License
/** * Start the child process to handle the task for us. * @param conf the task's configuration//w w w .java2s. co m * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String, String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort())); TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); // get the task's working directory String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(), taskid.getTaskID().toString(), false); //Add token to the environment if security is enabled Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.getPassword(); String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath(); writePasswordToLocalFile(localPasswordFile, password, conf); env.put("hadoop.pipes.shared.secret.location", localPasswordFile); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get(Submitter.INTERPRETOR); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); if (!(new File(executable).canExecute())) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.chmod(executable, "u+x"); } cmd.add(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); String challenge = getSecurityChallenge(); String digestToSend = createDigest(password, challenge); String digestExpected = createDigest(password, digestToSend); handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.authenticate(digestToSend, challenge); waitForAuthentication(); LOG.debug("Authentication succeeded"); downlink.start(); downlink.setJobConf(conf); }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapper.java
License:Apache License
/** * In our cluster with hadoop-0.20.2-cdh3u4a, runtime exception is thrown to Child but mapper status doesn't change * to failed. We fail this task to make sure our fail-over can make job successful. *///from www . j a v a2 s . c o m private void failTask(Throwable t, Configuration conf) { LOG.error("failtask: Killing task: {} ", conf.get(GuaguaMapReduceConstants.MAPRED_TASK_ID)); try { org.apache.hadoop.mapred.JobClient jobClient = new org.apache.hadoop.mapred.JobClient( (org.apache.hadoop.mapred.JobConf) conf); JobID jobId = JobID.forName(conf.get(GuaguaMapReduceConstants.MAPRED_JOB_ID)); RunningJob job = jobClient.getJob(jobId); job.killTask(TaskAttemptID.forName(conf.get(GuaguaMapReduceConstants.MAPRED_TASK_ID)), true); } catch (IOException ioe) { throw new GuaguaRuntimeException(ioe); } }