Example usage for org.apache.hadoop.mapreduce JobContext getMapperClass

List of usage examples for org.apache.hadoop.mapreduce JobContext getMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getMapperClass.

Prototype

public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException;

Source Link

Document

Get the Mapper class for the job.

Usage

From source file:com.ambiata.ivory.operation.hadoop.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(job);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(job);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }/* ww  w. j a v  a  2s .  co m*/

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                try {
                    mapperClass = job.getMapperClass();
                } catch (ClassNotFoundException e) {
                    throw new IOException("Mapper class is not found", e);
                }
            }

            FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            List<InputSplit> pathSplits = format.getSplits(jobCopy);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object createMapper(JobConf conf) throws Exception {
    Object mapper;//w  w w .  j a v a 2s  .c o m
    if (mapperClass != null) {
        return ReflectionUtils.newInstance(mapperClass, conf);
    } else {
        String mapperClassName = null;
        if (jobConf.getUseNewMapper()) {
            JobContext jobContext = new ContextFactory().createJobContext(conf);
            mapperClass = jobContext.getMapperClass();
            mapperClassName = mapperClass.getName();
        } else {
            mapperClass = conf.getMapperClass();
            mapperClassName = mapperClass.getName();
        }
        mapper = getHadoopClassFactory().createMapper(mapperClassName, conf);
    }
    return mapper;
}