Example usage for org.apache.hadoop.mapreduce Reducer run

List of usage examples for org.apache.hadoop.mapreduce Reducer run

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Reducer run.

Prototype

public void run(Context context) throws IOException, InterruptedException 

Source Link

Document

Advanced application writers can use the #run(org.apache.hadoop.mapreduce.Reducer.Context) method to control how the reduce task works.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from   w  w  w.  ja v  a  2 s . c o m*/
public void run(Context context) throws IOException, InterruptedException {
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(context.getConfiguration());
    BasicMapReduceTaskContext basicMapReduceContext = classLoader.getTaskContextProvider().get(context);

    // this is a hook for periodic flushing of changes buffered by datasets (to avoid OOME)
    WrappedReducer.Context flushingContext = createAutoFlushingContext(context, basicMapReduceContext);
    basicMapReduceContext.setHadoopContext(flushingContext);

    String userReducer = context.getConfiguration().get(ATTR_REDUCER_CLASS);
    ClassLoader programClassLoader = classLoader.getProgramClassLoader();
    Reducer delegate = createReducerInstance(programClassLoader, userReducer);

    // injecting runtime components, like datasets, etc.
    try {
        Reflections.visit(delegate, delegate.getClass(),
                new PropertyFieldSetter(basicMapReduceContext.getSpecification().getProperties()),
                new MetricsFieldSetter(basicMapReduceContext.getMetrics()),
                new DataSetFieldSetter(basicMapReduceContext));
    } catch (Throwable t) {
        LOG.error("Failed to inject fields to {}.", delegate.getClass(), t);
        throw Throwables.propagate(t);
    }

    ClassLoader oldClassLoader;
    if (delegate instanceof ProgramLifecycle) {
        oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
        try {
            ((ProgramLifecycle) delegate).initialize(new MapReduceLifecycleContext(basicMapReduceContext));
        } catch (Exception e) {
            LOG.error("Failed to initialize mapper with {}", basicMapReduceContext, e);
            throw Throwables.propagate(e);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }
    }

    oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
    try {
        delegate.run(flushingContext);
    } finally {
        ClassLoaders.setContextClassLoader(oldClassLoader);
    }

    // transaction is not finished, but we want all operations to be dispatched (some could be buffered in
    // memory by tx agent)
    try {
        basicMapReduceContext.flushOperations();
    } catch (Exception e) {
        LOG.error("Failed to flush operations at the end of reducer of " + basicMapReduceContext, e);
        throw Throwables.propagate(e);
    }

    // Close all writers created by MultipleOutputs
    basicMapReduceContext.closeMultiOutputs();

    if (delegate instanceof ProgramLifecycle) {
        oldClassLoader = ClassLoaders.setContextClassLoader(programClassLoader);
        try {
            ((ProgramLifecycle<? extends RuntimeContext>) delegate).destroy();
        } catch (Exception e) {
            LOG.error("Error during destroy of reducer {}", basicMapReduceContext, e);
            // Do nothing, try to finish
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }// w w  w .j  av a 2  s  .  c o m
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override//from   w w w  .  j av a2  s. c  o m
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    // Set mapper index for combiner tasks
    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;

        Reducer reducer;

        if (reduce)
            reducer = ReflectionUtils.newInstance(jobCtx.getReducerClass(), jobCtx.getConfiguration());
        else
            reducer = ReflectionUtils.newInstance(jobCtx.getCombinerClass(), jobCtx.getConfiguration());

        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));

            if (!reduce)
                taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;

        throw new IgniteCheckedException(e);
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();

        if (err != null)
            abort(outputFormat);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override/*from   w  ww  .  j a v a2 s.co  m*/
public void run0(GridHadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;

        Reducer reducer = ReflectionUtils.newInstance(
                reduce ? jobCtx.getReducerClass() : jobCtx.getCombinerClass(), jobCtx.getConfiguration());

        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;

        throw new IgniteCheckedException(e);
    } finally {
        if (err != null)
            abort(outputFormat);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override/*  w  ww.  jav a 2 s . c o m*/
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;

        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobCtx.getReducerClass(), jobCtx.getConfiguration());
        else
            reducer = ReflectionUtils.newInstance(jobCtx.getCombinerClass(), jobCtx.getConfiguration());

        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;

        throw new IgniteCheckedException(e);
    } finally {
        if (err != null)
            abort(outputFormat);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2ReduceTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override/*  w  ww .jav a2  s  .c  om*/
public void run0(GridHadoopV2TaskContext taskCtx) throws GridException {
    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;

        Reducer reducer = ReflectionUtils.newInstance(
                reduce ? jobCtx.getReducerClass() : jobCtx.getCombinerClass(), jobCtx.getConfiguration());

        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new GridInterruptedException(e);
    } catch (Exception e) {
        err = e;

        throw new GridException(e);
    } finally {
        if (err != null)
            abort(outputFormat);
    }
}