Example usage for org.apache.hadoop.mapred JobConf getCombinerClass

List of usage examples for org.apache.hadoop.mapred JobConf getCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getCombinerClass.

Prototype

public Class<? extends Reducer> getCombinerClass() 

Source Link

Document

Get the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private IOperatorDescriptor addCombiner(IOperatorDescriptor previousOperator, JobConf jobConf,
        JobSpecification spec) throws Exception {
    boolean useCombiner = (jobConf.getCombinerClass() != null);
    IOperatorDescriptor mapSideOutputOp = previousOperator;
    if (useCombiner) {
        System.out.println("Using Combiner:" + jobConf.getCombinerClass().getName());
        IOperatorDescriptor mapSideCombineSortOp = getExternalSorter(jobConf, spec);
        configurePartitionCountConstraint(spec, mapSideCombineSortOp, getInstanceCount(previousOperator));

        HadoopReducerOperatorDescriptor mapSideCombineReduceOp = getReducer(jobConf, spec, true);
        configurePartitionCountConstraint(spec, mapSideCombineReduceOp, getInstanceCount(previousOperator));
        spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator, 0, mapSideCombineSortOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), mapSideCombineSortOp, 0, mapSideCombineReduceOp, 0);
        mapSideOutputOp = mapSideCombineReduceOp;
    }//from  w w w .  ja  va  2  s.  co m
    return mapSideOutputOp;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from  w ww.j a v a2s .  co m
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        HadoopTaskInput input = taskCtx.input();

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(),
                    taskCtx0.attemptId());

            Reducer reducer;
            if (reduce)
                reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
            else
                reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

            assert reducer != null;

            try {
                try {
                    while (input.next()) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Reduce task cancelled.");

                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                    }

                    if (!reduce)
                        taskCtx.onMapperFinished();
                } finally {
                    reducer.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*from  w ww  .  j  a  va  2 s  .c  om*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/* w w  w. ja  v  a 2 s  .c  o  m*/
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1ReduceTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from   ww w  . j  av a 2 s  . c  om*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    HadoopTaskInput input = taskCtx.input();

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
        else
            reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*from ww w.  j  a va  2s . co  m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*ww  w.  ja  v  a2  s  . c  o m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJob job, HadoopJobId jobId, @Nullable UUID locNodeId,
        DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1ReduceTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*ww w . j ava  2  s .c  om*/
public void run(GridHadoopTaskContext taskCtx) throws GridException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    GridHadoopTaskInput input = taskCtx.input();

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());

        Reducer reducer = ReflectionUtils
                .newInstance(reduce ? jobConf.getReducerClass() : jobConf.getCombinerClass(), jobConf);

        assert reducer != null;

        try {
            try {
                while (input.next()) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Reduce task cancelled.");

                    reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                }
            } finally {
                reducer.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new GridException(e);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Open Source License

/**
 * @param taskInfo Task info.//from w ww .j  a  va  2  s  . co  m
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws GridException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new GridException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_GGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}