List of usage examples for org.apache.hadoop.mapred JobConf getInputFormat
public InputFormat getInputFormat()
From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1MapTask.java
License:Open Source License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") @Override/*w ww . j a va 2 s.c o m*/ public void run(GridHadoopTaskContext taskCtx) throws GridException { GridHadoopJob job = taskCtx.job(); GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx; JobConf jobConf = ctx.jobConf(); InputFormat inFormat = jobConf.getInputFormat(); GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS); } else nativeSplit = (InputSplit) ctx.getNativeSplit(split); assert nativeSplit != null; Reporter reporter = new GridHadoopV1Reporter(taskCtx); GridHadoopV1OutputCollector collector = null; try { collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), ctx.attemptId()); RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter); Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); Object key = reader.createKey(); Object val = reader.createValue(); assert mapper != null; try { try { while (reader.next(key, val)) { if (isCancelled()) throw new GridHadoopTaskCancelledException("Map task cancelled."); mapper.map(key, val, collector, reporter); } } finally { mapper.close(); } } finally { collector.closeWriter(); } collector.commit(); } catch (Exception e) { if (collector != null) collector.abort(); throw new GridException(e); } }
From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1Splitter.java
License:Open Source License
/** * @param jobConf Job configuration./*from w w w .j a v a2 s . c om*/ * @return Collection of mapped splits. * @throws GridException If mapping failed. */ public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws GridException { try { InputFormat<?, ?> format = jobConf.getInputFormat(); assert format != null; InputSplit[] splits = format.getSplits(jobConf, 0); Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length); for (int i = 0; i < splits.length; i++) { InputSplit nativeSplit = splits[i]; if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations())); } return res; } catch (IOException e) { throw new GridException(e); } }
From source file:tap.core.MapperBridge.java
License:Apache License
/** * @param conf/*from ww w . j ava 2 s . co m*/ * @throws IOException * @throws FileNotFoundException */ private void determineInputFormat(JobConf conf) throws FileNotFoundException, IOException { /** * Compare mapper input file signature with Hadoop configured class */ FileFormat ff = sniffMapInFormat(conf); if (!ff.isCompatible(conf.getInputFormat())) { throw new IllegalArgumentException("Map input format not compatible with file format."); } //otherwise assume it is avro? if (conf.getInputFormat() instanceof TextInputFormat) { Class<?> inClass = conf.getClass(Phase.MAP_IN_CLASS, Object.class, Object.class); if (inClass == String.class) { isStringInput = true; } else if (inClass == Text.class) { isTextInput = true; } else { isJsonInput = true; inSchema = ReflectUtils.getSchema((IN) ReflectionUtils.newInstance(inClass, conf)); } } isProtoInput = conf.getInputFormat() instanceof TapfileInputFormat; }