Example usage for org.apache.hadoop.mapred JobConf getReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf getReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getReducerClass.

Prototype

public Class<? extends Reducer> getReducerClass() 

Source Link

Document

Get the Reducer class for the job.

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks

    LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf));

    JobID jobID = (JobID) invocationParameters.getJobId();
    this.hadoopPartition = hadoopPartition;
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            jobConf);//from   w w w  . j  a  v  a 2  s.c om

    TaskAttemptID taskAttemptID = TaskAttemptID
            .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition));

    updateJobConf(jobConf, taskAttemptID, region);

    context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID);

    reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobConf.getReducerClass(), jobConf);

    reducer.configure(jobConf);

    OutputFormat outputFormat = jobConf.getOutputFormat();

    FileSystem fs = FileSystem.get(jobConf);
    recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs,
            jobConf, getOutputName(hadoopPartition), Reporter.NULL);

    committer = jobConf.getOutputCommitter();

    //Create task object so it can handle file format initialization
    //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection.
    try {
        Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask");
        Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class,
                int.class, int.class, int.class);
        reduceTaskConstructor.setAccessible(true);
        Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0);
        task.setConf(jobConf);
        task.initialize(jobConf, jobID, Reporter.NULL, false);
    } catch (Exception e) {
        throw new IOException("Cannot initialize ReduceTask", e);
    }

    committer.setupTask(context);

    Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer,
            valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass,
            valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf));
    transport = DataGridChunkedCollectionReader.getGridReader(params);
    outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
        @Override
        public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
            recordWriter.write(outkey, outvalue);
        }
    };
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//from   w w  w .  j  a  va 2  s  .  com
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   www.  j  a  va  2 s .com*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        HadoopTaskInput input = taskCtx.input();

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(),
                    taskCtx0.attemptId());

            Reducer reducer;
            if (reduce)
                reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
            else
                reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

            assert reducer != null;

            try {
                try {
                    while (input.next()) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Reduce task cancelled.");

                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                    }

                    if (!reduce)
                        taskCtx.onMapperFinished();
                } finally {
                    reducer.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from  ww w  . j  a v a  2 s . c  o m
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*w  w w .j a  v a2  s .  c  om*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    HadoopTaskInput input = taskCtx.input();

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
        else
            reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.tez.mapreduce.processor.reduce.ReduceProcessor.java

License:Apache License

void runOldReducer(JobConf job, final MRTaskReporter reporter, KeyValuesReader input, RawComparator comparator,
        Class keyClass, Class valueClass, final KeyValueWriter output)
        throws IOException, InterruptedException {

    Reducer reducer = ReflectionUtils.newInstance(job.getReducerClass(), job);

    // make output collector

    OutputCollector collector = new OutputCollector() {
        public void collect(Object key, Object value) throws IOException {
            output.write(key, value);/* w w w .  j  a  v a2  s  .c om*/
        }
    };

    // apply reduce function
    try {
        ReduceValuesIterator values = new ReduceValuesIterator(input, reporter, reduceInputValueCounter);

        values.informReduceProgress();
        while (values.more()) {
            reduceInputKeyCounter.increment(1);
            reducer.reduce(values.getKey(), values, collector, reporter);
            values.informReduceProgress();
        }

        // Set progress to 1.0f if there was no exception,
        reporter.setProgress(1.0f);

        //Clean up: repeated in catch block below
        reducer.close();
        //End of clean up.
    } catch (IOException ioe) {
        try {
            reducer.close();
        } catch (IOException ignored) {
        }

        throw ioe;
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   w  w  w . ja v a 2s .c  o  m*/
public void run(GridHadoopTaskContext taskCtx) throws GridException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new GridException(e);
    }
}