Example usage for org.apache.hadoop.mapred JobConf getInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInputFormat.

Prototype

public InputFormat getInputFormat()

Source Link

Document

Get the InputFormat implementation for the map-reduce job, defaults to TextInputFormat if not specified explicity.

Usage

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1MapTask.java

License:Open Source License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*w ww  . j  a va  2 s.c o m*/
public void run(GridHadoopTaskContext taskCtx) throws GridException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    GridHadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof GridHadoopFileBlock) {
        GridHadoopFileBlock block = (GridHadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new GridHadoopV1Reporter(taskCtx);

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new GridException(e);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1Splitter.java

License:Open Source License

/**
 * @param jobConf Job configuration./*from   w  w  w  .j a  v a2  s . c om*/
 * @return Collection of mapped splits.
 * @throws GridException If mapping failed.
 */
public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws GridException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();

        assert format != null;

        InputSplit[] splits = format.getSplits(jobConf, 0);

        Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length);

        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];

            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }

        return res;
    } catch (IOException e) {
        throw new GridException(e);
    }
}

From source file:tap.core.MapperBridge.java

License:Apache License

/**
 * @param conf/*from ww  w .  j ava 2  s  . co m*/
 * @throws IOException
 * @throws FileNotFoundException
 */
private void determineInputFormat(JobConf conf) throws FileNotFoundException, IOException {

    /**
     * Compare mapper input file signature with Hadoop configured class
     */
    FileFormat ff = sniffMapInFormat(conf);
    if (!ff.isCompatible(conf.getInputFormat())) {
        throw new IllegalArgumentException("Map input format not compatible with file format.");
    }

    //otherwise assume it is avro?
    if (conf.getInputFormat() instanceof TextInputFormat) {
        Class<?> inClass = conf.getClass(Phase.MAP_IN_CLASS, Object.class, Object.class);
        if (inClass == String.class) {
            isStringInput = true;
        } else if (inClass == Text.class) {
            isTextInput = true;
        } else {
            isJsonInput = true;
            inSchema = ReflectUtils.getSchema((IN) ReflectionUtils.newInstance(inClass, conf));
        }
    }
    isProtoInput = conf.getInputFormat() instanceof TapfileInputFormat;
}