List of usage examples for org.apache.hadoop.mapred JobConf getReducerClass
public Class<? extends Reducer> getReducerClass()
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId, int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException { this.invocationParameters = invocationParameters; JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf)); JobID jobID = (JobID) invocationParameters.getJobId(); this.hadoopPartition = hadoopPartition; hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(), jobConf);//from w w w . j a v a 2 s.c om TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition)); updateJobConf(jobConf, taskAttemptID, region); context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(jobConf.getReducerClass(), jobConf); reducer.configure(jobConf); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs, jobConf, getOutputName(hadoopPartition), Reporter.NULL); committer = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, int.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0); task.setConf(jobConf); task.initialize(jobConf, jobID, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize ReduceTask", e); } committer.setupTask(context); Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass(); WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass(); WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>( valueClass, null); DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region, appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass, valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf), 1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf), HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf)); transport = DataGridChunkedCollectionReader.getGridReader(params); outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { recordWriter.write(outkey, outvalue); } }; }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
private static void configureAvroOutput(JobConf job) { if (job.get("mapred.output.format.class") == null) job.setOutputFormat(AvroOutputFormat.class); if (job.getReducerClass() == IdentityReducer.class) job.setReducerClass(HadoopReducer.class); job.setOutputKeyClass(AvroWrapper.class); configureAvroShuffle(job);//from w w w . j a va 2 s . com }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from www. j a va 2 s .com*/ public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJobEx job = taskCtx.job(); HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx; if (!reduce && taskCtx.taskInfo().hasMapperIndex()) HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex()); else HadoopMapperUtils.clearMapperIndex(); try { JobConf jobConf = taskCtx0.jobConf(); HadoopTaskInput input = taskCtx.input(); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(), taskCtx0.attemptId()); Reducer reducer; if (reduce) reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf); else reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf); assert reducer != null; try { try { while (input.next()) { if (isCancelled()) throw new HadoopTaskCancelledException("Reduce task cancelled."); reducer.reduce(input.key(), input.values(), collector, Reporter.NULL); } if (!reduce) taskCtx.onMapperFinished(); } finally { reducer.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } } finally { if (!reduce) HadoopMapperUtils.clearMapperIndex(); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1ReduceTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override//from ww w . j a v a 2 s . c o m public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); GridHadoopTaskInput input = taskCtx.input(); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId()); Reducer reducer = ReflectionUtils .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf); assert reducer != null; try { try { while (input.next()) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Reduce task cancelled."); reducer.reduce(input.key(), input.values(), collector, Reporter.NULL); } } finally { reducer.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1ReduceTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*w w w .j a v a2 s . c om*/ public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJob job = taskCtx.job(); HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); HadoopTaskInput input = taskCtx.input(); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId()); Reducer reducer; if (reduce) reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf); else reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf); assert reducer != null; try { try { while (input.next()) { if (isCancelled()) throw new HadoopTaskCancelledException("Reduce task cancelled."); reducer.reduce(input.key(), input.values(), collector, Reporter.NULL); } } finally { reducer.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java
License:Apache License
void runOldReducer(JobConf job, final MRTaskReporter reporter, KeyValuesReader input, RawComparator comparator, Class keyClass, Class valueClass, final KeyValueWriter output) throws IOException, InterruptedException { Reducer reducer = ReflectionUtils.newInstance(job.getReducerClass(), job); // make output collector OutputCollector collector = new OutputCollector() { public void collect(Object key, Object value) throws IOException { output.write(key, value);/* w w w . j a v a2 s .c om*/ } }; // apply reduce function try { ReduceValuesIterator values = new ReduceValuesIterator(input, reporter, reduceInputValueCounter); values.informReduceProgress(); while (values.more()) { reduceInputKeyCounter.increment(1); reducer.reduce(values.getKey(), values, collector, reporter); values.informReduceProgress(); } // Set progress to 1.0f if there was no exception, reporter.setProgress(1.0f); //Clean up: repeated in catch block below reducer.close(); //End of clean up. } catch (IOException ioe) { try { reducer.close(); } catch (IOException ignored) { } throw ioe; } }
From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1ReduceTask.java
License:Open Source License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from w w w . ja v a 2s .c o m*/ public void run(GridHadoopTaskContext taskCtx) throws GridException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); GridHadoopTaskInput input = taskCtx.input(); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId()); Reducer reducer = ReflectionUtils .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf); assert reducer != null; try { try { while (input.next()) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Reduce task cancelled."); reducer.reduce(input.key(), input.values(), collector, Reporter.NULL); } } finally { reducer.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new GridException(e); } }