List of usage examples for org.apache.hadoop.mapreduce Mapper run
public void run(Context context) throws IOException, InterruptedException
From source file:co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java
License:Apache License
@SuppressWarnings("unchecked") @Override// w w w .j a v a2 s . c o m public void run(Context context) throws IOException, InterruptedException { MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration()); BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context); // this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME) WrappedMapper.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext); basicMapReduceContext.setHadoopContext(flushingContext); InputSplit inputSplit = context.getInputSplit(); if (inputSplit instanceof TaggedInputSplit) { basicMapReduceContext.setInputName(((TaggedInputSplit) inputSplit).getName()); } ClassLoader programClassLoader = classLoader.getProgramClassLoader(); Mapper delegate = createMapperInstance(programClassLoader, getWrappedMapper(context.getConfiguration()), context); // injecting runtime components, like datasets, etc. try { Reflections.visit(delegate, delegate.getClass(), new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()), new MetricsFieldSetter(basicMapReduceContext.getMetrics()), new DataSetFieldSetter(basicMapReduceContext)); } catch (Throwable t) { LOG.error("Failed to inject fields to {}.", delegate.getClass(), t); throw Throwables.propagate(t); } ClassLoader oldClassLoader; if (delegate instanceof ProgramLifecycle) { oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader); try { ((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext)); } catch (Exception e) { LOG.error("Failed to initialize mapper with {}", basicMapReduceContext, e); throw Throwables.propagate(e); } finally { ClassLoaders.setContextClassLoader(oldClassLoader); } } oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader); try { delegate.run(flushingContext); } finally { ClassLoaders.setContextClassLoader(oldClassLoader); } // transaction is not finished, but we want all operations to be dispatched (some could be buffered in // memory by tx agent) try { basicMapReduceContext.flushOperations(); } catch (Exception e) { LOG.error("Failed to flush operations at the end of mapper of {}", basicMapReduceContext, e); throw Throwables.propagate(e); } // Close all writers created by MultipleOutputs basicMapReduceContext.closeMultiOutputs(); if (delegate instanceof ProgramLifecycle) { oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader); try { ((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy(); } catch (Exception e) { LOG.error("Error during destroy of mapper {}", basicMapReduceContext, e); // Do nothing, try to finish } finally { ClassLoaders.setContextClassLoader(oldClassLoader); } } }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }/*from w ww. j ava 2 s. c om*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.marklogic.contentpump.LocalJobRunner.java
License:Apache License
/** * Run the job. Get the input splits, create map tasks and submit it to * the thread pool if there is one; otherwise, runs the the task one by * one./* w w w. ja v a 2 s . c o m*/ * * @param <INKEY> * @param <INVALUE> * @param <OUTKEY> * @param <OUTVALUE> * @throws Exception */ @SuppressWarnings("unchecked") public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run() throws Exception { Configuration conf = job.getConfiguration(); InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils .newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = inputFormat.getSplits(job); T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // goes first Arrays.sort(array, new SplitLengthComparator()); OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(job.getOutputFormatClass(), conf); Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass(); Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(mapperClass, conf); try { outputFormat.checkOutputSpecs(job); } catch (Exception ex) { if (LOG.isDebugEnabled()) { LOG.debug("Error checking output specification: ", ex); } else { LOG.error("Error checking output specification: "); LOG.error(ex.getMessage()); } return; } conf = job.getConfiguration(); progress = new AtomicInteger[splits.size()]; for (int i = 0; i < splits.size(); i++) { progress[i] = new AtomicInteger(); } Monitor monitor = new Monitor(); monitor.start(); reporter = new ContentPumpReporter(); List<Future<Object>> taskList = new ArrayList<Future<Object>>(); for (int i = 0; i < array.length; i++) { InputSplit split = array[i]; if (pool != null) { LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>( inputFormat, outputFormat, conf, i, split, reporter, progress[i]); availableThreads = assignThreads(i, array.length); Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass(); if (availableThreads > 1 && availableThreads != threadsPerSplit) { // possible runtime adjustment if (runtimeMapperClass != (Class) MultithreadedMapper.class) { runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads); } if (runtimeMapperClass != mapperClass) { task.setMapperClass(runtimeMapperClass); } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { task.setThreadCount(availableThreads); if (LOG.isDebugEnabled()) { LOG.debug("Thread Count for Split#" + i + " : " + availableThreads); } } } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { synchronized (pool) { taskList.add(pool.submit(task)); pool.wait(); } } else { pool.submit(task); } } else { // single-threaded JobID jid = new JobID(); TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i); TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0); TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId); RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context); RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context); OutputCommitter committer = outputFormat.getOutputCommitter(context); TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]); Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId, trackingReader, writer, committer, reporter, split); trackingReader.initialize(split, mapperContext); // no thread pool (only 1 thread specified) Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass(); mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class); mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass, mapperContext.getConfiguration()); mapper.run(mapperContext); trackingReader.close(); writer.close(mapperContext); committer.commitTask(context); } } // wait till all tasks are done if (pool != null) { for (Future<Object> f : taskList) { f.get(); } pool.shutdown(); while (!pool.awaitTermination(1, TimeUnit.DAYS)) ; jobComplete.set(true); } monitor.interrupt(); monitor.join(1000); // report counters Iterator<CounterGroup> groupIt = reporter.counters.iterator(); while (groupIt.hasNext()) { CounterGroup group = groupIt.next(); LOG.info(group.getDisplayName() + ": "); Iterator<Counter> counterIt = group.iterator(); while (counterIt.hasNext()) { Counter counter = counterIt.next(); LOG.info(counter.getDisplayName() + ": " + counter.getValue()); } } LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec"); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java
License:Apache License
/** * Runs mapper for the single split./*from ww w. j av a 2 s .c o m*/ * * @param mapOutputAccumulator mapOutputAccumulator to use * @param split split ot run on */ @Override @SuppressWarnings("unchecked") public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex) throws IOException, ClassNotFoundException, InterruptedException { TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex); //Setup task ID info TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration, taskAttemptId); InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration); //Create RecordReader org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat .createRecordReader((InputSplit) split, taskContext); //Make a mapper org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper; try { mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor .newInstance(); } catch (Exception e) { throw new RuntimeException(e); } org.apache.hadoop.mapreduce.RecordWriter output; OutputCommitter committer = null; if (mapOnlyJob) { OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(), configuration); output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat .getRecordWriter(taskContext); committer = outputFormat.getOutputCommitter(taskContext); committer.setupTask(taskContext); } else { output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator); } input.initialize((InputSplit) split, taskContext); org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode .getMapperContext(configuration, taskAttemptId, input, output); mapper.run(mapperContext); input.close(); output.close(mapperContext); if (mapOnlyJob && committer != null) { committer.commitTask(taskContext); } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final HadoopHelper helper = new HadoopHelper(config); final Configuration conf = helper.getConfiguration(); final Mapper<K1, V1, K2, V2> mapper = helper.getMapper(); final InputFormat<K1, V1> inputFormat = helper.getInputFormat(); final IInputSplitProvider isp = factory.createInputSplitProvider(partition); final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0); final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId); final int framesLimit = helper.getSortFrameLimit(ctx); final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories(); class SortingRecordWriter extends RecordWriter<K2, V2> { private final ArrayTupleBuilder tb; private final ByteBuffer frame; private final FrameTupleAppender fta; private ExternalSortRunGenerator runGen; private int blockId; public SortingRecordWriter() throws HyracksDataException { tb = new ArrayTupleBuilder(2); frame = ctx.allocateFrame(); fta = new FrameTupleAppender(ctx.getFrameSize()); fta.reset(frame, true);// w w w . j ava 2 s. c om } public void initBlock(int blockId) throws HyracksDataException { runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories, helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit); this.blockId = blockId; } @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { runGen.nextFrame(frame); fta.reset(frame, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + frame.capacity() + ")"); } } } public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException { if (fta.getTupleCount() > 0) { runGen.nextFrame(frame); fta.reset(frame, true); } runGen.close(); IFrameWriter delegatingWriter = new IFrameWriter() { private final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize()); private final ByteBuffer outFrame = ctx.allocateFrame(); private final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), helper.getMapOutputRecordDescriptorWithoutExtraFields()); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3); @Override public void open() throws HyracksDataException { appender.reset(outFrame, true); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); int n = fta.getTupleCount(); for (int i = 0; i < n; ++i) { tb.reset(); tb.addField(fta, i, 0); tb.addField(fta, i, 1); try { tb.getDataOutput().writeInt(blockId); } catch (IOException e) { throw new HyracksDataException(e); } tb.addFieldEndOffset(); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(outFrame, writer); appender.reset(outFrame, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } } @Override public void close() throws HyracksDataException { if (appender.getTupleCount() > 0) { FrameUtils.flushFrame(outFrame, writer); } } @Override public void fail() throws HyracksDataException { // TODO Auto-generated method stub } }; if (helper.hasCombiner()) { Reducer<K2, V2, K2, V2> combiner = helper.getCombiner(); TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0); TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId); final IFrameWriter outputWriter = delegatingWriter; RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() { private final FrameTupleAppender fta = new FrameTupleAppender(ctx.getFrameSize()); private final ByteBuffer buffer = ctx.allocateFrame(); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2); { fta.reset(buffer, true); outputWriter.open(); } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(buffer, outputWriter); fta.reset(buffer, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(buffer, outputWriter); outputWriter.close(); } } }; delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper, new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(), helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId, ctaskAttemptContext); } IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length]; for (int i = 0; i < comparatorFactories.length; ++i) { comparators[i] = comparatorFactories[i].createBinaryComparator(); } ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getFrameSorter(), runGen.getRuns(), new int[] { 0 }, comparators, null, helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter); merger.process(); } } return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { writer.open(); try { SortingRecordWriter recordWriter = new SortingRecordWriter(); InputSplit split = null; int blockId = 0; while ((split = isp.next()) != null) { try { RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); recordReader.initialize(split, taskAttemptContext); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } recordWriter.initBlock(blockId); Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId, recordReader, recordWriter, null, null, split); mapper.run(mCtx); recordReader.close(); recordWriter.sortAndFlushBlock(writer); ++blockId; } catch (IOException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } } } finally { writer.close(); } } }; }
From source file:org.apache.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override/*from w w w . j a v a2s . c o m*/ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final HadoopHelper helper = new HadoopHelper(config); final Configuration conf = helper.getConfiguration(); final Mapper<K1, V1, K2, V2> mapper = helper.getMapper(); final InputFormat<K1, V1> inputFormat = helper.getInputFormat(); final IInputSplitProvider isp = factory.createInputSplitProvider(partition); final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0); final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId); final int framesLimit = helper.getSortFrameLimit(ctx); final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories(); class SortingRecordWriter extends RecordWriter<K2, V2> { private final ArrayTupleBuilder tb; private final IFrame frame; private final FrameTupleAppender fta; private ExternalSortRunGenerator runGen; private int blockId; public SortingRecordWriter() throws HyracksDataException { tb = new ArrayTupleBuilder(2); frame = new VSizeFrame(ctx); fta = new FrameTupleAppender(frame); } public void initBlock(int blockId) throws HyracksDataException { runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories, helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit); this.blockId = blockId; } @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { runGen.nextFrame(frame.getBuffer()); fta.reset(frame, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + frame.getBuffer().capacity() + ")"); } } } public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException { if (fta.getTupleCount() > 0) { runGen.nextFrame(frame.getBuffer()); fta.reset(frame, true); } runGen.close(); IFrameWriter delegatingWriter = new IFrameWriter() { private final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx)); private final FrameTupleAccessor fta = new FrameTupleAccessor( helper.getMapOutputRecordDescriptorWithoutExtraFields()); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3); @Override public void open() throws HyracksDataException { } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); int n = fta.getTupleCount(); for (int i = 0; i < n; ++i) { tb.reset(); tb.addField(fta, i, 0); tb.addField(fta, i, 1); try { tb.getDataOutput().writeInt(blockId); } catch (IOException e) { throw new HyracksDataException(e); } tb.addFieldEndOffset(); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { appender.flush(writer, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } } @Override public void close() throws HyracksDataException { appender.flush(writer, true); } @Override public void fail() throws HyracksDataException { // TODO Auto-generated method stub } }; if (helper.hasCombiner()) { Reducer<K2, V2, K2, V2> combiner = helper.getCombiner(); TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0); TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId); final IFrameWriter outputWriter = delegatingWriter; RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() { private final FrameTupleAppender fta = new FrameTupleAppender(new VSizeFrame(ctx)); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2); { outputWriter.open(); } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { fta.flush(outputWriter, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { fta.flush(outputWriter, true); } }; delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper, new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(), helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId, ctaskAttemptContext); } IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length]; for (int i = 0; i < comparatorFactories.length; ++i) { comparators[i] = comparatorFactories[i].createBinaryComparator(); } ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getSorter(), runGen.getRuns(), new int[] { 0 }, comparators, null, helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter); merger.process(); } } return new AbstractUnaryOutputSourceOperatorNodePushable() { @SuppressWarnings("unchecked") @Override public void initialize() throws HyracksDataException { try { writer.open(); SortingRecordWriter recordWriter = new SortingRecordWriter(); InputSplit split = null; int blockId = 0; while ((split = isp.next()) != null) { try { RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); recordReader.initialize(split, taskAttemptContext); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } recordWriter.initBlock(blockId); Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId, recordReader, recordWriter, null, null, split); mapper.run(mCtx); recordReader.close(); recordWriter.sortAndFlushBlock(writer); ++blockId; } catch (IOException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } } } catch (Throwable th) { writer.fail(); throw th; } finally { writer.close(); } } }; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings({ "ConstantConditions", "unchecked" }) @Override/*ww w . j a v a 2 s . c o m*/ public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException { OutputFormat outputFormat = null; Exception err = null; JobContextImpl jobCtx = taskCtx.jobContext(); if (taskCtx.taskInfo().hasMapperIndex()) HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex()); else HadoopMapperUtils.clearMapperIndex(); try { HadoopV2Context hadoopCtx = hadoopContext(); InputSplit nativeSplit = hadoopCtx.getInputSplit(); if (nativeSplit == null) throw new IgniteCheckedException("Input split cannot be null."); InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration()); RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx); reader.initialize(nativeSplit, hadoopCtx); hadoopCtx.reader(reader); HadoopJobInfo jobInfo = taskCtx.job().info(); outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx); Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration()); try { mapper.run(new WrappedMapper().getMapContext(hadoopCtx)); taskCtx.onMapperFinished(); } finally { closeWriter(); } commit(outputFormat); } catch (InterruptedException e) { err = e; Thread.currentThread().interrupt(); throw new IgniteInterruptedCheckedException(e); } catch (Exception e) { err = e; throw new IgniteCheckedException(e); } finally { HadoopMapperUtils.clearMapperIndex(); if (err != null) abort(outputFormat); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings({ "ConstantConditions", "unchecked" }) @Override// w w w. ja v a2s. c om public void run0(GridHadoopV2TaskContext taskCtx) throws IgniteCheckedException { GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), null); } else nativeSplit = (InputSplit) taskCtx.getNativeSplit(split); assert nativeSplit != null; OutputFormat outputFormat = null; Exception err = null; JobContextImpl jobCtx = taskCtx.jobContext(); try { InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopContext().getConfiguration()); RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopContext()); reader.initialize(nativeSplit, hadoopContext()); hadoopContext().reader(reader); GridHadoopJobInfo jobInfo = taskCtx.job().info(); outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx); Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopContext().getConfiguration()); try { mapper.run(new WrappedMapper().getMapContext(hadoopContext())); } finally { closeWriter(); } commit(outputFormat); } catch (InterruptedException e) { err = e; Thread.currentThread().interrupt(); throw new IgniteInterruptedCheckedException(e); } catch (Exception e) { err = e; throw new IgniteCheckedException(e); } finally { if (err != null) abort(outputFormat); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings({ "ConstantConditions", "unchecked" }) @Override/* www .j a va 2s . co m*/ public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException { HadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof HadoopFileBlock) { HadoopFileBlock block = (HadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), null); } else nativeSplit = (InputSplit) taskCtx.getNativeSplit(split); assert nativeSplit != null; OutputFormat outputFormat = null; Exception err = null; JobContextImpl jobCtx = taskCtx.jobContext(); try { InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopContext().getConfiguration()); RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopContext()); reader.initialize(nativeSplit, hadoopContext()); hadoopContext().reader(reader); HadoopJobInfo jobInfo = taskCtx.job().info(); outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx); Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopContext().getConfiguration()); try { mapper.run(new WrappedMapper().getMapContext(hadoopContext())); } finally { closeWriter(); } commit(outputFormat); } catch (InterruptedException e) { err = e; Thread.currentThread().interrupt(); throw new IgniteInterruptedCheckedException(e); } catch (Exception e) { err = e; throw new IgniteCheckedException(e); } finally { if (err != null) abort(outputFormat); } }
From source file:org.apache.pig.pen.LocalMapReduceSimulator.java
License:Apache License
@SuppressWarnings("unchecked") public void launchPig(PhysicalPlan php, Map<LOLoad, DataBag> baseData, LineageTracer lineage, IllustratorAttacher attacher, ExampleGenerator eg, PigContext pc) throws PigException, IOException, InterruptedException { phyToMRMap.clear();//from w w w. java 2s. c o m MROperPlan mrp = launcher.compile(php, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jc; int numMRJobsCompl = 0; DataBag input; List<Pair<PigNullableWritable, Writable>> intermediateData = new ArrayList<Pair<PigNullableWritable, Writable>>(); Map<Job, MapReduceOper> jobToMroMap = jcc.getJobMroMap(); HashMap<String, DataBag> output = new HashMap<String, DataBag>(); Configuration jobConf; // jc is null only when mrp.size == 0 boolean needFileInput; final ArrayList<OperatorKey> emptyInpTargets = new ArrayList<OperatorKey>(); pc.getProperties().setProperty("pig.illustrating", "true"); while (mrp.size() != 0) { jc = jcc.compile(mrp, "Illustrator"); if (jc == null) { throw new ExecException("Native execution is not supported"); } List<Job> jobs = jc.getWaitingJobs(); for (Job job : jobs) { jobConf = job.getJobConf(); FileLocalizer.setInitialized(false); ArrayList<ArrayList<OperatorKey>> inpTargets = (ArrayList<ArrayList<OperatorKey>>) ObjectSerializer .deserialize(jobConf.get("pig.inpTargets")); intermediateData.clear(); MapReduceOper mro = jobToMroMap.get(job); PigSplit split = null; List<POStore> stores = null; PhysicalOperator pack = null; // revisit as there are new physical operators from MR compilation if (!mro.mapPlan.isEmpty()) attacher.revisit(mro.mapPlan); if (!mro.reducePlan.isEmpty()) { attacher.revisit(mro.reducePlan); pack = mro.reducePlan.getRoots().get(0); } List<POLoad> lds = PlanHelper.getPhysicalOperators(mro.mapPlan, POLoad.class); if (!mro.mapPlan.isEmpty()) { stores = PlanHelper.getPhysicalOperators(mro.mapPlan, POStore.class); } if (!mro.reducePlan.isEmpty()) { if (stores == null) stores = PlanHelper.getPhysicalOperators(mro.reducePlan, POStore.class); else stores.addAll(PlanHelper.getPhysicalOperators(mro.reducePlan, POStore.class)); } for (POStore store : stores) { output.put(store.getSFile().getFileName(), attacher.getDataMap().get(store)); } OutputAttacher oa = new OutputAttacher(mro.mapPlan, output); oa.visit(); if (!mro.reducePlan.isEmpty()) { oa = new OutputAttacher(mro.reducePlan, output); oa.visit(); } int index = 0; for (POLoad ld : lds) { input = output.get(ld.getLFile().getFileName()); if (input == null && baseData != null) { for (LogicalRelationalOperator lo : baseData.keySet()) { if (((LOLoad) lo).getSchemaFile().equals(ld.getLFile().getFileName())) { input = baseData.get(lo); break; } } } if (input != null) mro.mapPlan.remove(ld); } for (POLoad ld : lds) { // check newly generated data first input = output.get(ld.getLFile().getFileName()); if (input == null && baseData != null) { if (input == null && baseData != null) { for (LogicalRelationalOperator lo : baseData.keySet()) { if (((LOLoad) lo).getSchemaFile().equals(ld.getLFile().getFileName())) { input = baseData.get(lo); break; } } } } needFileInput = (input == null); split = new PigSplit(null, index, needFileInput ? emptyInpTargets : inpTargets.get(index), 0); ++index; Mapper<Text, Tuple, PigNullableWritable, Writable> map; if (mro.reducePlan.isEmpty()) { // map-only map = new PigMapOnly.Map(); Mapper<Text, Tuple, PigNullableWritable, Writable>.Context context = ((PigMapOnly.Map) map) .getIllustratorContext(jobConf, input, intermediateData, split); if (mro.isCounterOperation()) { if (mro.isRowNumber()) { map = new PigMapReduceCounter.PigMapCounter(); } context = ((PigMapReduceCounter.PigMapCounter) map).getIllustratorContext(jobConf, input, intermediateData, split); } ((PigMapBase) map).setMapPlan(mro.mapPlan); map.run(context); } else { if ("true".equals(jobConf.get("pig.usercomparator"))) map = new PigMapReduce.MapWithComparator(); else if (!"".equals(jobConf.get("pig.keyDistFile", ""))) map = new PigMapReduce.MapWithPartitionIndex(); else map = new PigMapReduce.Map(); Mapper<Text, Tuple, PigNullableWritable, Writable>.Context context = ((PigMapBase) map) .getIllustratorContext(jobConf, input, intermediateData, split); ((PigMapBase) map).setMapPlan(mro.mapPlan); map.run(context); } } if (!mro.reducePlan.isEmpty()) { if (pack instanceof POPackage) mro.reducePlan.remove(pack); // reducer run PigMapReduce.Reduce reduce; if ("true".equals(jobConf.get("pig.usercomparator"))) reduce = new PigMapReduce.ReduceWithComparator(); else reduce = new PigMapReduce.Reduce(); Reducer<PigNullableWritable, NullableTuple, PigNullableWritable, Writable>.Context context = reduce .getIllustratorContext(job, intermediateData, (POPackage) pack); if (mro.isCounterOperation()) { reduce = new PigMapReduceCounter.PigReduceCounter(); context = ((PigMapReduceCounter.PigReduceCounter) reduce).getIllustratorContext(job, intermediateData, (POPackage) pack); } ((PigMapReduce.Reduce) reduce).setReducePlan(mro.reducePlan); reduce.run(context); } for (PhysicalOperator key : mro.phyToMRMap.keySet()) for (PhysicalOperator value : mro.phyToMRMap.get(key)) phyToMRMap.put(key, value); } int removedMROp = jcc.updateMROpPlan(new LinkedList<Job>()); numMRJobsCompl += removedMROp; } jcc.reset(); }