List of usage examples for org.apache.hadoop.mapred JobConf getMapperClass
public Class<? extends Mapper> getMapperClass()
From source file:com.ibm.bi.dml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java
License:Apache License
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { JobConf confCopy = new JobConf(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf); Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf); Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>(); // First, build a map of InputFormats to Paths for (Entry<Path, InputFormat> entry : formatMap.entrySet()) { if (!formatPaths.containsKey(entry.getValue().getClass())) { formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>()); }//from ww w . java 2 s. c o m formatPaths.get(entry.getValue().getClass()).add(entry.getKey()); } for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) { Class<? extends InputFormat> formatClass = formatEntry.getKey(); InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf); List<Path> paths = formatEntry.getValue(); Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>(); // Now, for each set of paths that have a common InputFormat, build // a map of Mappers to the paths they're used for for (Path path : paths) { Class<? extends Mapper> mapperClass = mapperMap.get(path); if (!mapperPaths.containsKey(mapperClass)) { mapperPaths.put(mapperClass, new LinkedList<Path>()); } mapperPaths.get(mapperClass).add(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) { paths = mapEntry.getValue(); Class<? extends Mapper> mapperClass = mapEntry.getKey(); if (mapperClass == null) { mapperClass = conf.getMapperClass(); } FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. InputSplit[] pathSplits = format.getSplits(confCopy, numSplits); for (InputSplit pathSplit : pathSplits) { splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass)); } } } return splits.toArray(new InputSplit[splits.size()]); }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
private Object createMapper(JobConf conf) throws Exception { Object mapper;/*w w w . j a v a 2s . c o m*/ if (mapperClass != null) { return ReflectionUtils.newInstance(mapperClass, conf); } else { String mapperClassName = null; if (jobConf.getUseNewMapper()) { JobContext jobContext = new ContextFactory().createJobContext(conf); mapperClass = jobContext.getMapperClass(); mapperClassName = mapperClass.getName(); } else { mapperClass = conf.getMapperClass(); mapperClassName = mapperClass.getName(); } mapper = getHadoopClassFactory().createMapper(mapperClassName, conf); } return mapper; }
From source file:edu.ucsb.cs.hybrid.mappers.MultipleS_Runner.java
License:Apache License
@Override public void instantMapper(JobConf job) { ClassLoader myClassLoader = ClassLoader.getSystemClassLoader(); try {/*from w w w . ja va 2s . com*/ Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName()); mapper = (MultipleS_Mapper) mapperClass.newInstance(); mapper.configure(job); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java
License:Apache License
public void instantMapper(JobConf job) { ClassLoader myClassLoader = ClassLoader.getSystemClassLoader(); try {//from w w w . ja v a 2 s. c o m Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName()); mapper = (SingleS_Mapper) mapperClass.newInstance(); mapper.configure(job); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
private static void configureAvroInput(JobConf job) { if (job.get("mapred.input.format.class") == null) job.setInputFormat(AvroInputFormat.class); if (job.getMapperClass() == IdentityMapper.class) job.setMapperClass(HadoopMapper.class); configureAvroShuffle(job);//from w w w. ja va 2s. co m }
From source file:org.apache.avro.mapred.DelegatingInputFormat.java
License:Apache License
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { JobConf confCopy = new JobConf(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); Map<Path, Class<? extends AvroMapper>> mapperMap = AvroMultipleInputs.getMapperTypeMap(conf); Map<Path, Schema> schemaMap = AvroMultipleInputs.getInputSchemaMap(conf); Map<Schema, List<Path>> schemaPaths = new HashMap<Schema, List<Path>>(); // First, build a map of Schemas to Paths for (Entry<Path, Schema> entry : schemaMap.entrySet()) { if (!schemaPaths.containsKey(entry.getValue())) { schemaPaths.put(entry.getValue(), new LinkedList<Path>()); System.out.println(entry.getValue()); System.out.println(entry.getKey()); }// w ww . j av a 2 s . co m schemaPaths.get(entry.getValue()).add(entry.getKey()); } for (Entry<Schema, List<Path>> schemaEntry : schemaPaths.entrySet()) { Schema schema = schemaEntry.getKey(); System.out.println(schema); InputFormat format = (InputFormat) ReflectionUtils.newInstance(AvroInputFormat.class, conf); List<Path> paths = schemaEntry.getValue(); Map<Class<? extends AvroMapper>, List<Path>> mapperPaths = new HashMap<Class<? extends AvroMapper>, List<Path>>(); // Now, for each set of paths that have a common Schema, build // a map of Mappers to the paths they're used for for (Path path : paths) { Class<? extends AvroMapper> mapperClass = mapperMap.get(path); if (!mapperPaths.containsKey(mapperClass)) { mapperPaths.put(mapperClass, new LinkedList<Path>()); } mapperPaths.get(mapperClass).add(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. for (Entry<Class<? extends AvroMapper>, List<Path>> mapEntry : mapperPaths.entrySet()) { paths = mapEntry.getValue(); Class<? extends AvroMapper> mapperClass = mapEntry.getKey(); if (mapperClass == null) { mapperClass = (Class<? extends AvroMapper>) conf.getMapperClass(); } FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. InputSplit[] pathSplits = format.getSplits(confCopy, numSplits); for (InputSplit pathSplit : pathSplits) { splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass, schema)); } } } return splits.toArray(new InputSplit[splits.size()]); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override//from w ww . j a v a2 s .com public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJobEx job = taskCtx.job(); HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx; if (taskCtx.taskInfo().hasMapperIndex()) HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex()); else HadoopMapperUtils.clearMapperIndex(); try { JobConf jobConf = taskCtx0.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); HadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof HadoopFileBlock) { HadoopFileBlock block = (HadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new HadoopV1Reporter(taskCtx); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new HadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } taskCtx.onMapperFinished(); } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } } finally { HadoopMapperUtils.clearMapperIndex(); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override//from w ww . j av a 2 s. com public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new GridHadoopV1Reporter(taskCtx); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1MapTask.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*from w w w. j a v a 2 s .c o m*/ public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException { HadoopJob job = taskCtx.job(); HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); HadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof HadoopFileBlock) { HadoopFileBlock block = (HadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new HadoopV1Reporter(taskCtx); HadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new HadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new IgniteCheckedException(e); } }
From source file:org.apache.sysml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java
License:Apache License
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { JobConf confCopy = new JobConf(conf); List<InputSplit> splits = new ArrayList<>(); Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf); Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf); Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<>(); // First, build a map of InputFormats to Paths for (Entry<Path, InputFormat> entry : formatMap.entrySet()) { if (!formatPaths.containsKey(entry.getValue().getClass())) { formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>()); }/* w w w .j av a 2s .co m*/ formatPaths.get(entry.getValue().getClass()).add(entry.getKey()); } for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) { Class<? extends InputFormat> formatClass = formatEntry.getKey(); InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf); List<Path> paths = formatEntry.getValue(); Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<>(); // Now, for each set of paths that have a common InputFormat, build // a map of Mappers to the paths they're used for for (Path path : paths) { Class<? extends Mapper> mapperClass = mapperMap.get(path); if (!mapperPaths.containsKey(mapperClass)) { mapperPaths.put(mapperClass, new LinkedList<Path>()); } mapperPaths.get(mapperClass).add(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) { paths = mapEntry.getValue(); Class<? extends Mapper> mapperClass = mapEntry.getKey(); if (mapperClass == null) { mapperClass = conf.getMapperClass(); } FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. InputSplit[] pathSplits = format.getSplits(confCopy, numSplits); for (InputSplit pathSplit : pathSplits) { splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass)); } } } return splits.toArray(new InputSplit[splits.size()]); }