Example usage for org.apache.hadoop.mapred JobConf getMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapperClass.

Prototype

public Class<? extends Mapper> getMapperClass()

Source Link

Document

Get the Mapper class for the job.

Usage

From source file:com.ibm.bi.dml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }//from ww  w . java 2  s.  c  o m

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object createMapper(JobConf conf) throws Exception {
    Object mapper;/*w  w  w . j a  v a  2s  . c o  m*/
    if (mapperClass != null) {
        return ReflectionUtils.newInstance(mapperClass, conf);
    } else {
        String mapperClassName = null;
        if (jobConf.getUseNewMapper()) {
            JobContext jobContext = new ContextFactory().createJobContext(conf);
            mapperClass = jobContext.getMapperClass();
            mapperClassName = mapperClass.getName();
        } else {
            mapperClass = conf.getMapperClass();
            mapperClassName = mapperClass.getName();
        }
        mapper = getHadoopClassFactory().createMapper(mapperClassName, conf);
    }
    return mapper;
}

From source file:edu.ucsb.cs.hybrid.mappers.MultipleS_Runner.java

License:Apache License

@Override
public void instantMapper(JobConf job) {
    ClassLoader myClassLoader = ClassLoader.getSystemClassLoader();
    try {/*from w w  w  . ja va  2s . com*/
        Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName());
        mapper = (MultipleS_Mapper) mapperClass.newInstance();
        mapper.configure(job);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public void instantMapper(JobConf job) {
    ClassLoader myClassLoader = ClassLoader.getSystemClassLoader();
    try {//from   w  w  w .  ja v a 2  s. c  o  m
        Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName());
        mapper = (SingleS_Mapper) mapperClass.newInstance();
        mapper.configure(job);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroInput(JobConf job) {
    if (job.get("mapred.input.format.class") == null)
        job.setInputFormat(AvroInputFormat.class);

    if (job.getMapperClass() == IdentityMapper.class)
        job.setMapperClass(HadoopMapper.class);

    configureAvroShuffle(job);//from   w w w.  ja va 2s. co m
}

From source file:org.apache.avro.mapred.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    Map<Path, Class<? extends AvroMapper>> mapperMap = AvroMultipleInputs.getMapperTypeMap(conf);
    Map<Path, Schema> schemaMap = AvroMultipleInputs.getInputSchemaMap(conf);
    Map<Schema, List<Path>> schemaPaths = new HashMap<Schema, List<Path>>();

    // First, build a map of Schemas to Paths
    for (Entry<Path, Schema> entry : schemaMap.entrySet()) {
        if (!schemaPaths.containsKey(entry.getValue())) {
            schemaPaths.put(entry.getValue(), new LinkedList<Path>());
            System.out.println(entry.getValue());
            System.out.println(entry.getKey());
        }// w  ww .  j  av a  2 s .  co  m

        schemaPaths.get(entry.getValue()).add(entry.getKey());
    }

    for (Entry<Schema, List<Path>> schemaEntry : schemaPaths.entrySet()) {
        Schema schema = schemaEntry.getKey();
        System.out.println(schema);
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(AvroInputFormat.class, conf);
        List<Path> paths = schemaEntry.getValue();

        Map<Class<? extends AvroMapper>, List<Path>> mapperPaths = new HashMap<Class<? extends AvroMapper>, List<Path>>();

        // Now, for each set of paths that have a common Schema, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends AvroMapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends AvroMapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends AvroMapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = (Class<? extends AvroMapper>) conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass, schema));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from w ww  . j  a v a2  s  .com
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        InputFormat inFormat = jobConf.getInputFormat();

        HadoopInputSplit split = info().inputSplit();

        InputSplit nativeSplit;

        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock block = (HadoopFileBlock) split;

            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                    EMPTY_HOSTS);
        } else
            nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);

        assert nativeSplit != null;

        Reporter reporter = new HadoopV1Reporter(taskCtx);

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(),
                    fileName(), taskCtx0.attemptId());

            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

            Object key = reader.createKey();
            Object val = reader.createValue();

            assert mapper != null;

            try {
                try {
                    while (reader.next(key, val)) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Map task cancelled.");

                        mapper.map(key, val, collector, reporter);
                    }

                    taskCtx.onMapperFinished();
                } finally {
                    mapper.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from   w  ww . j av a  2 s.  com
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    GridHadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof GridHadoopFileBlock) {
        GridHadoopFileBlock block = (GridHadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new GridHadoopV1Reporter(taskCtx);

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from  w w w.  j  a v a 2  s .c o  m*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    HadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof HadoopFileBlock) {
        HadoopFileBlock block = (HadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new HadoopV1Reporter(taskCtx);

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.sysml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }/* w w  w  .j av a 2s .co  m*/

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}