Example usage for org.apache.hadoop.mapred OutputCommitter commitTask

List of usage examples for org.apache.hadoop.mapred OutputCommitter commitTask

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred OutputCommitter commitTask.

Prototype

@Override
public final void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext) throws IOException 

Source Link

Document

This method implements the new interface by calling the old method.

Usage

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

public void close() throws Exception {
    if (writer != null) {
        writer.close((Reporter) reporter);
        writer = null;//from  ww  w  .  j  av  a2 s.  c o m
        OutputCommitter committer = conf.getOutputCommitter();
        committer.commitTask(sequentialTask);
        // FIXME: We skip the job cleanup because the FileOutputCommitter deletes the _temporary, which
        // is shared by all sequential jobs.
        committer.cleanupJob(sequentialJob);
        sequentialTask = null;
        //      
        //      if (committer instanceof FileOutputCommitter) {
        //       
        //        // for this case, only one file is expected
        //        String fileName = new Path(location).getName();
        //        Path pTgt = null;
        //        Path src = null;
        //        try {
        //          pTgt = FileOutputFormat.getOutputPath(conf);
        //          src = FileOutputFormat.getTaskOutputPath(conf, fileName);
        //        } catch(Exception e) {
        //          // TODO: this can happen if the OutputFormat is not a FileOutputFormat,
        //          // i.e., for HBase
        //          LOG.warn("task output files not found");
        //        }
        //        if(pTgt != null && src != null) {
        //          Path tgt = new Path(FileOutputFormat.getOutputPath(conf), fileName);
        //
        //          FileSystem fs = src.getFileSystem(conf);
        //          if(fs.exists(tgt)) {
        //            fs.delete(tgt, true);
        //          }
        //
        //
        //          // rename src to tgt
        //          fs.rename(src, tgt);
        //
        //          // clean-up the temp
        //          Path tmp = new Path(FileOutputFormat.getOutputPath(conf), FileOutputCommitter.TEMP_DIR_NAME);
        //          if(fs.exists(tmp))
        //            fs.delete(tmp, true);
        //        }
        //      }
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java

License:Apache License

/**
 * Runs mapper for the single split./*from  w w  w . j av a 2s .  c o m*/
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split                split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(final MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split,
        int splitIndex) throws IOException, ClassNotFoundException, InterruptedException {
    JobConf jobConf = new JobConf(this.jobConf); //Clone JobConf to prevent unexpected task interaction

    TaskAttemptID taskAttemptID = TaskAttemptID
            .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex));

    ReducerWrapperMapred.updateJobConf(jobConf, taskAttemptID, splitIndex);
    updateJobWithSplit(jobConf, split);

    InputFormat inputFormat = jobConf.getInputFormat();

    Reporter reporter = Reporter.NULL;

    //Create RecordReader
    org.apache.hadoop.mapred.RecordReader<INKEY, INVALUE> recordReader = inputFormat
            .getRecordReader((InputSplit) split, jobConf, reporter);

    //Make a mapper
    org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
        mapper.configure(jobConf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot instantiate mapper " + mapperConstructor.getDeclaringClass(), e);
    }

    //These are to support map only jobs which write output directly to HDFS.
    final RecordWriter outputRecordWriter;
    OutputCommitter outputCommitter = null;
    TaskAttemptContext taskAttemptContext = null;

    if (mapOnlyJob) {

        taskAttemptContext = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID);
        OutputFormat outputFormat = jobConf.getOutputFormat();
        FileSystem fs = FileSystem.get(jobConf);
        outputRecordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(fs, jobConf, ReducerWrapperMapred.getOutputName(splitIndex), Reporter.NULL);
        outputCommitter = jobConf.getOutputCommitter();

        //Create task object so it can handle file format initialization
        //The MapTask is private in the Hadoop 1.x so we have to go through reflection.
        try {
            Class reduceTask = Class.forName("org.apache.hadoop.mapred.MapTask");
            Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class,
                    TaskAttemptID.class, int.class, JobSplit.TaskSplitIndex.class, int.class);
            reduceTaskConstructor.setAccessible(true);
            Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, splitIndex,
                    new JobSplit.TaskSplitIndex(), 0);
            task.setConf(jobConf);
            task.initialize(jobConf, jobId, Reporter.NULL, false);
        } catch (Exception e) {
            throw new IOException("Cannot initialize MapTask", e);
        }
        outputCommitter.setupTask(taskAttemptContext);
    } else {
        outputRecordWriter = null;
    }

    OutputCollector<OUTKEY, OUTVALUE> outputCollector;

    if (!mapOnlyJob) {
        outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
            @Override
            public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
                try {
                    mapOutputAccumulator.combine(outkey, outvalue);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
            }
        };
    } else {
        outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
            @Override
            public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
                outputRecordWriter.write(outkey, outvalue);
            }
        };
    }

    INKEY key = recordReader.createKey();
    INVALUE value = recordReader.createValue();

    while (recordReader.next(key, value)) {
        mapper.map(key, value, outputCollector, reporter);
    }
    mapper.close();

    recordReader.close();

    if (mapOnlyJob) {
        outputRecordWriter.close(Reporter.NULL);
        outputCommitter.commitTask(taskAttemptContext);
    }

}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1OutputCollector.java

License:Apache License

/**
 * Commit task./*from  w  w w.j a va 2 s.c  o  m*/
 *
 * @throws IOException In failed.
 */
public void commit() throws IOException {
    if (writer != null) {
        OutputCommitter outputCommitter = jobConf.getOutputCommitter();

        TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt);

        if (outputCommitter.needsTaskCommit(taskCtx))
            outputCommitter.commitTask(taskCtx);
    }
}