List of usage examples for org.apache.hadoop.mapred Mapper map
void map(K1 key, V1 value, OutputCollector<K2, V2> output, Reporter reporter) throws IOException;
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java
License:Apache License
/** * Runs mapper for the single split.//from w ww .java 2s. c o m * * @param mapOutputAccumulator mapOutputAccumulator to use * @param split split ot run on */ @Override @SuppressWarnings("unchecked") public void runSplit(final MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex) throws IOException, ClassNotFoundException, InterruptedException { JobConf jobConf = new JobConf(this.jobConf); //Clone JobConf to prevent unexpected task interaction TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex)); ReducerWrapperMapred.updateJobConf(jobConf, taskAttemptID, splitIndex); updateJobWithSplit(jobConf, split); InputFormat inputFormat = jobConf.getInputFormat(); Reporter reporter = Reporter.NULL; //Create RecordReader org.apache.hadoop.mapred.RecordReader<INKEY, INVALUE> recordReader = inputFormat .getRecordReader((InputSplit) split, jobConf, reporter); //Make a mapper org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper; try { mapper = (org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor .newInstance(); mapper.configure(jobConf); } catch (Exception e) { throw new RuntimeException("Cannot instantiate mapper " + mapperConstructor.getDeclaringClass(), e); } //These are to support map only jobs which write output directly to HDFS. final RecordWriter outputRecordWriter; OutputCommitter outputCommitter = null; TaskAttemptContext taskAttemptContext = null; if (mapOnlyJob) { taskAttemptContext = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); outputRecordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat .getRecordWriter(fs, jobConf, ReducerWrapperMapred.getOutputName(splitIndex), Reporter.NULL); outputCommitter = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The MapTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.MapTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, JobSplit.TaskSplitIndex.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, splitIndex, new JobSplit.TaskSplitIndex(), 0); task.setConf(jobConf); task.initialize(jobConf, jobId, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize MapTask", e); } outputCommitter.setupTask(taskAttemptContext); } else { outputRecordWriter = null; } OutputCollector<OUTKEY, OUTVALUE> outputCollector; if (!mapOnlyJob) { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { try { mapOutputAccumulator.combine(outkey, outvalue); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } }; } else { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { outputRecordWriter.write(outkey, outvalue); } }; } INKEY key = recordReader.createKey(); INVALUE value = recordReader.createValue(); while (recordReader.next(key, value)) { mapper.map(key, value, outputCollector, reporter); } mapper.close(); recordReader.close(); if (mapOnlyJob) { outputRecordWriter.close(Reporter.NULL); outputCommitter.commitTask(taskAttemptContext); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/* ww w . ja va 2s . co m*/ public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJobEx job = taskCtx.job(); HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx; if (taskCtx.taskInfo().hasMapperIndex()) HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex()); else HadoopMapperUtils.clearMapperIndex(); try { JobConf jobConf = taskCtx0.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); HadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof HadoopFileBlock) { HadoopFileBlock block = (HadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new HadoopV1Reporter(taskCtx); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new HadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } taskCtx.onMapperFinished(); } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } } finally { HadoopMapperUtils.clearMapperIndex(); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from w w w . j a v a2 s . co m*/ public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new GridHadoopV1Reporter(taskCtx); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from w w w. j av a 2 s. c om*/ public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJob job = taskCtx.job(); HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); HadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof HadoopFileBlock) { HadoopFileBlock block = (HadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new HadoopV1Reporter(taskCtx); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new HadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1MapTask.java
License:Open Source License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from w w w.j ava 2 s . c om*/ public void run(GridHadoopTaskContext taskCtx) throws GridException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new GridHadoopV1Reporter(taskCtx); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new GridException(e); } }