Example usage for org.apache.hadoop.mapred JobConf getReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getReducerClass.

Prototype

public Class<? extends Reducer> getReducerClass()

Source Link

Document

Get the Reducer class for the job.

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks

    LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf));

    JobID jobID = (JobID) invocationParameters.getJobId();
    this.hadoopPartition = hadoopPartition;
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            jobConf);//from   w w w  . j  a  v  a 2  s.c om

    TaskAttemptID taskAttemptID = TaskAttemptID
            .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition));

    updateJobConf(jobConf, taskAttemptID, region);

    context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID);

    reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobConf.getReducerClass(), jobConf);

    reducer.configure(jobConf);

    OutputFormat outputFormat = jobConf.getOutputFormat();

    FileSystem fs = FileSystem.get(jobConf);
    recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs,
            jobConf, getOutputName(hadoopPartition), Reporter.NULL);

    committer = jobConf.getOutputCommitter();

    //Create task object so it can handle file format initialization
    //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection.
    try {
        Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask");
        Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class,
                int.class, int.class, int.class);
        reduceTaskConstructor.setAccessible(true);
        Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0);
        task.setConf(jobConf);
        task.initialize(jobConf, jobID, Reporter.NULL, false);
    } catch (Exception e) {
        throw new IOException("Cannot initialize ReduceTask", e);
    }

    committer.setupTask(context);

    Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer,
            valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass,
            valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf));
    transport = DataGridChunkedCollectionReader.getGridReader(params);
    outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
        @Override
        public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
            recordWriter.write(outkey, outvalue);
        }
    };
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//from   w w  w .  j  a  va 2  s  .  com
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   www.  j  a  va  2 s .com*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        HadoopTaskInput input = taskCtx.input();

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(),
                    taskCtx0.attemptId());

            Reducer reducer;
            if (reduce)
                reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
            else
                reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

            assert reducer != null;

            try {
                try {
                    while (input.next()) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Reduce task cancelled.");

                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                    }

                    if (!reduce)
                        taskCtx.onMapperFinished();
                } finally {
                    reducer.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from  ww w  . j  a v a  2 s . c  o m
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*w  w w .j a  v a2  s .  c  om*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    HadoopTaskInput input = taskCtx.input();

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
        else
            reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java

License:Apache License

void runOldReducer(JobConf job, final MRTaskReporter reporter, KeyValuesReader input, RawComparator comparator,
        Class keyClass, Class valueClass, final KeyValueWriter output)
        throws IOException, InterruptedException {

    Reducer reducer = ReflectionUtils.newInstance(job.getReducerClass(), job);

    // make output collector

    OutputCollector collector = new OutputCollector() {
        public void collect(Object key, Object value) throws IOException {
            output.write(key, value);/* w w w .  j  a  v a2  s  .c om*/
        }
    };

    // apply reduce function
    try {
        ReduceValuesIterator values = new ReduceValuesIterator(input, reporter, reduceInputValueCounter);

        values.informReduceProgress();
        while (values.more()) {
            reduceInputKeyCounter.increment(1);
            reducer.reduce(values.getKey(), values, collector, reporter);
            values.informReduceProgress();
        }

        // Set progress to 1.0f if there was no exception,
        reporter.setProgress(1.0f);

        //Clean up: repeated in catch block below
        reducer.close();
        //End of clean up.
    } catch (IOException ioe) {
        try {
            reducer.close();
        } catch (IOException ignored) {
        }

        throw ioe;
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   w  w  w . ja v a 2s .c  o  m*/
public void run(GridHadoopTaskContext taskCtx) throws GridException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new GridException(e);
    }
}