Example usage for org.apache.hadoop.mapreduce JobContext getMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getMapperClass.

Prototype

public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException;

Source Link

Document

Get the Mapper class for the job.

Usage

From source file:com.ambiata.ivory.operation.hadoop.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(job);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(job);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }/* ww  w. j a v  a  2s .  co m*/

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                try {
                    mapperClass = job.getMapperClass();
                } catch (ClassNotFoundException e) {
                    throw new IOException("Mapper class is not found", e);
                }
            }

            FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            List<InputSplit> pathSplits = format.getSplits(jobCopy);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object createMapper(JobConf conf) throws Exception {
    Object mapper;//w  w w .  j a v a 2s  .c o m
    if (mapperClass != null) {
        return ReflectionUtils.newInstance(mapperClass, conf);
    } else {
        String mapperClassName = null;
        if (jobConf.getUseNewMapper()) {
            JobContext jobContext = new ContextFactory().createJobContext(conf);
            mapperClass = jobContext.getMapperClass();
            mapperClassName = mapperClass.getName();
        } else {
            mapperClass = conf.getMapperClass();
            mapperClassName = mapperClass.getName();
        }
        mapper = getHadoopClassFactory().createMapper(mapperClassName, conf);
    }
    return mapper;
}