List of usage examples for org.apache.hadoop.mapreduce JobContext getMapperClass
public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException;
From source file:com.ambiata.ivory.operation.hadoop.DelegatingInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); Job jobCopy = new Job(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(job); Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(job); Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>(); // First, build a map of InputFormats to Paths for (Entry<Path, InputFormat> entry : formatMap.entrySet()) { if (!formatPaths.containsKey(entry.getValue().getClass())) { formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>()); }/* ww w. j a v a 2s . co m*/ formatPaths.get(entry.getValue().getClass()).add(entry.getKey()); } for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) { Class<? extends InputFormat> formatClass = formatEntry.getKey(); InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf); List<Path> paths = formatEntry.getValue(); Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>(); // Now, for each set of paths that have a common InputFormat, build // a map of Mappers to the paths they're used for for (Path path : paths) { Class<? extends Mapper> mapperClass = mapperMap.get(path); if (!mapperPaths.containsKey(mapperClass)) { mapperPaths.put(mapperClass, new LinkedList<Path>()); } mapperPaths.get(mapperClass).add(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) { paths = mapEntry.getValue(); Class<? extends Mapper> mapperClass = mapEntry.getKey(); if (mapperClass == null) { try { mapperClass = job.getMapperClass(); } catch (ClassNotFoundException e) { throw new IOException("Mapper class is not found", e); } } FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. List<InputSplit> pathSplits = format.getSplits(jobCopy); for (InputSplit pathSplit : pathSplits) { splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass)); } } } return splits; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
private Object createMapper(JobConf conf) throws Exception { Object mapper;//w w w . j a v a 2s .c o m if (mapperClass != null) { return ReflectionUtils.newInstance(mapperClass, conf); } else { String mapperClassName = null; if (jobConf.getUseNewMapper()) { JobContext jobContext = new ContextFactory().createJobContext(conf); mapperClass = jobContext.getMapperClass(); mapperClassName = mapperClass.getName(); } else { mapperClass = conf.getMapperClass(); mapperClassName = mapperClass.getName(); } mapper = getHadoopClassFactory().createMapper(mapperClassName, conf); } return mapper; }