Example usage for org.apache.hadoop.mapred JobConf getCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getCombinerClass.

Prototype

public Class<? extends Reducer> getCombinerClass()

Source Link

Document

Get the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private IOperatorDescriptor addCombiner(IOperatorDescriptor previousOperator, JobConf jobConf,
        JobSpecification spec) throws Exception {
    boolean useCombiner = (jobConf.getCombinerClass() != null);
    IOperatorDescriptor mapSideOutputOp = previousOperator;
    if (useCombiner) {
        System.out.println("Using Combiner:" + jobConf.getCombinerClass().getName());
        IOperatorDescriptor mapSideCombineSortOp = getExternalSorter(jobConf, spec);
        configurePartitionCountConstraint(spec, mapSideCombineSortOp, getInstanceCount(previousOperator));

        HadoopReducerOperatorDescriptor mapSideCombineReduceOp = getReducer(jobConf, spec, true);
        configurePartitionCountConstraint(spec, mapSideCombineReduceOp, getInstanceCount(previousOperator));
        spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator, 0, mapSideCombineSortOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), mapSideCombineSortOp, 0, mapSideCombineReduceOp, 0);
        mapSideOutputOp = mapSideCombineReduceOp;
    }//from  w w w .  ja  va  2  s.  co m
    return mapSideOutputOp;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from  w ww.j a v a2s .  co m
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        HadoopTaskInput input = taskCtx.input();

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(),
                    taskCtx0.attemptId());

            Reducer reducer;
            if (reduce)
                reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
            else
                reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

            assert reducer != null;

            try {
                try {
                    while (input.next()) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Reduce task cancelled.");

                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                    }

                    if (!reduce)
                        taskCtx.onMapperFinished();
                } finally {
                    reducer.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*from  w ww  .  j  a  va  2 s  .c  om*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/* w w  w. ja  v  a 2 s  .c  o  m*/
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   ww w  . j  av a 2 s  . c  om*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    HadoopTaskInput input = taskCtx.input();

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
        else
            reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*from ww w.  j  a va  2s . co  m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*ww  w.  ja  v  a2  s  . c  o m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJob job, HadoopJobId jobId, @Nullable UUID locNodeId,
        DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*ww w . j ava  2  s .c  om*/
public void run(GridHadoopTaskContext taskCtx) throws GridException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new GridException(e);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Open Source License

/**
 * @param taskInfo Task info.//from w ww .j  a  va  2  s  . co  m
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws GridException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new GridException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_GGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}