List of usage examples for org.apache.hadoop.mapred OutputCommitter commitTask
@Override public final void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext) throws IOException
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java
License:Apache License
public void close() throws Exception { if (writer != null) { writer.close((Reporter) reporter); writer = null;//from ww w . j av a2 s. c o m OutputCommitter committer = conf.getOutputCommitter(); committer.commitTask(sequentialTask); // FIXME: We skip the job cleanup because the FileOutputCommitter deletes the _temporary, which // is shared by all sequential jobs. committer.cleanupJob(sequentialJob); sequentialTask = null; // // if (committer instanceof FileOutputCommitter) { // // // for this case, only one file is expected // String fileName = new Path(location).getName(); // Path pTgt = null; // Path src = null; // try { // pTgt = FileOutputFormat.getOutputPath(conf); // src = FileOutputFormat.getTaskOutputPath(conf, fileName); // } catch(Exception e) { // // TODO: this can happen if the OutputFormat is not a FileOutputFormat, // // i.e., for HBase // LOG.warn("task output files not found"); // } // if(pTgt != null && src != null) { // Path tgt = new Path(FileOutputFormat.getOutputPath(conf), fileName); // // FileSystem fs = src.getFileSystem(conf); // if(fs.exists(tgt)) { // fs.delete(tgt, true); // } // // // // rename src to tgt // fs.rename(src, tgt); // // // clean-up the temp // Path tmp = new Path(FileOutputFormat.getOutputPath(conf), FileOutputCommitter.TEMP_DIR_NAME); // if(fs.exists(tmp)) // fs.delete(tmp, true); // } // } } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java
License:Apache License
/** * Runs mapper for the single split./*from w w w . j av a 2s . c o m*/ * * @param mapOutputAccumulator mapOutputAccumulator to use * @param split split ot run on */ @Override @SuppressWarnings("unchecked") public void runSplit(final MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex) throws IOException, ClassNotFoundException, InterruptedException { JobConf jobConf = new JobConf(this.jobConf); //Clone JobConf to prevent unexpected task interaction TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex)); ReducerWrapperMapred.updateJobConf(jobConf, taskAttemptID, splitIndex); updateJobWithSplit(jobConf, split); InputFormat inputFormat = jobConf.getInputFormat(); Reporter reporter = Reporter.NULL; //Create RecordReader org.apache.hadoop.mapred.RecordReader<INKEY, INVALUE> recordReader = inputFormat .getRecordReader((InputSplit) split, jobConf, reporter); //Make a mapper org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper; try { mapper = (org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor .newInstance(); mapper.configure(jobConf); } catch (Exception e) { throw new RuntimeException("Cannot instantiate mapper " + mapperConstructor.getDeclaringClass(), e); } //These are to support map only jobs which write output directly to HDFS. final RecordWriter outputRecordWriter; OutputCommitter outputCommitter = null; TaskAttemptContext taskAttemptContext = null; if (mapOnlyJob) { taskAttemptContext = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); outputRecordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat .getRecordWriter(fs, jobConf, ReducerWrapperMapred.getOutputName(splitIndex), Reporter.NULL); outputCommitter = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The MapTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.MapTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, JobSplit.TaskSplitIndex.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, splitIndex, new JobSplit.TaskSplitIndex(), 0); task.setConf(jobConf); task.initialize(jobConf, jobId, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize MapTask", e); } outputCommitter.setupTask(taskAttemptContext); } else { outputRecordWriter = null; } OutputCollector<OUTKEY, OUTVALUE> outputCollector; if (!mapOnlyJob) { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { try { mapOutputAccumulator.combine(outkey, outvalue); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } }; } else { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { outputRecordWriter.write(outkey, outvalue); } }; } INKEY key = recordReader.createKey(); INVALUE value = recordReader.createValue(); while (recordReader.next(key, value)) { mapper.map(key, value, outputCollector, reporter); } mapper.close(); recordReader.close(); if (mapOnlyJob) { outputRecordWriter.close(Reporter.NULL); outputCommitter.commitTask(taskAttemptContext); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1OutputCollector.java
License:Apache License
/** * Commit task./*from w w w.j a va 2 s.c o m*/ * * @throws IOException In failed. */ public void commit() throws IOException { if (writer != null) { OutputCommitter outputCommitter = jobConf.getOutputCommitter(); TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt); if (outputCommitter.needsTaskCommit(taskCtx)) outputCommitter.commitTask(taskCtx); } }