Example usage for org.apache.hadoop.mapred JobConf getMapperClass

List of usage examples for org.apache.hadoop.mapred JobConf getMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapperClass.

Prototype

public Class<? extends Mapper> getMapperClass() 

Source Link

Document

Get the Mapper class for the job.

Usage

From source file:com.ibm.bi.dml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }//from ww  w . java 2  s.  c  o m

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<Class<? extends Mapper>, List<Path>>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object createMapper(JobConf conf) throws Exception {
    Object mapper;/*w  w  w . j a  v a  2s  . c o  m*/
    if (mapperClass != null) {
        return ReflectionUtils.newInstance(mapperClass, conf);
    } else {
        String mapperClassName = null;
        if (jobConf.getUseNewMapper()) {
            JobContext jobContext = new ContextFactory().createJobContext(conf);
            mapperClass = jobContext.getMapperClass();
            mapperClassName = mapperClass.getName();
        } else {
            mapperClass = conf.getMapperClass();
            mapperClassName = mapperClass.getName();
        }
        mapper = getHadoopClassFactory().createMapper(mapperClassName, conf);
    }
    return mapper;
}

From source file:edu.ucsb.cs.hybrid.mappers.MultipleS_Runner.java

License:Apache License

@Override
public void instantMapper(JobConf job) {
    ClassLoader myClassLoader = ClassLoader.getSystemClassLoader();
    try {/*from w w  w  . ja va  2s . com*/
        Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName());
        mapper = (MultipleS_Mapper) mapperClass.newInstance();
        mapper.configure(job);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public void instantMapper(JobConf job) {
    ClassLoader myClassLoader = ClassLoader.getSystemClassLoader();
    try {//from   w  w  w .  ja v a 2  s. c  o  m
        Class mapperClass = myClassLoader.loadClass(job.getMapperClass().getName());
        mapper = (SingleS_Mapper) mapperClass.newInstance();
        mapper.configure(job);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroInput(JobConf job) {
    if (job.get("mapred.input.format.class") == null)
        job.setInputFormat(AvroInputFormat.class);

    if (job.getMapperClass() == IdentityMapper.class)
        job.setMapperClass(HadoopMapper.class);

    configureAvroShuffle(job);//from   w w w.  ja va 2s. co m
}

From source file:org.apache.avro.mapred.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    Map<Path, Class<? extends AvroMapper>> mapperMap = AvroMultipleInputs.getMapperTypeMap(conf);
    Map<Path, Schema> schemaMap = AvroMultipleInputs.getInputSchemaMap(conf);
    Map<Schema, List<Path>> schemaPaths = new HashMap<Schema, List<Path>>();

    // First, build a map of Schemas to Paths
    for (Entry<Path, Schema> entry : schemaMap.entrySet()) {
        if (!schemaPaths.containsKey(entry.getValue())) {
            schemaPaths.put(entry.getValue(), new LinkedList<Path>());
            System.out.println(entry.getValue());
            System.out.println(entry.getKey());
        }// w  ww .  j  av a  2 s .  co  m

        schemaPaths.get(entry.getValue()).add(entry.getKey());
    }

    for (Entry<Schema, List<Path>> schemaEntry : schemaPaths.entrySet()) {
        Schema schema = schemaEntry.getKey();
        System.out.println(schema);
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(AvroInputFormat.class, conf);
        List<Path> paths = schemaEntry.getValue();

        Map<Class<? extends AvroMapper>, List<Path>> mapperPaths = new HashMap<Class<? extends AvroMapper>, List<Path>>();

        // Now, for each set of paths that have a common Schema, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends AvroMapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends AvroMapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends AvroMapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = (Class<? extends AvroMapper>) conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass, schema));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from w ww  . j  a v a2  s  .com
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        InputFormat inFormat = jobConf.getInputFormat();

        HadoopInputSplit split = info().inputSplit();

        InputSplit nativeSplit;

        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock block = (HadoopFileBlock) split;

            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                    EMPTY_HOSTS);
        } else
            nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);

        assert nativeSplit != null;

        Reporter reporter = new HadoopV1Reporter(taskCtx);

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(),
                    fileName(), taskCtx0.attemptId());

            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

            Object key = reader.createKey();
            Object val = reader.createValue();

            assert mapper != null;

            try {
                try {
                    while (reader.next(key, val)) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Map task cancelled.");

                        mapper.map(key, val, collector, reporter);
                    }

                    taskCtx.onMapperFinished();
                } finally {
                    mapper.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from   w  ww . j av a  2 s.  com
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    GridHadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof GridHadoopFileBlock) {
        GridHadoopFileBlock block = (GridHadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new GridHadoopV1Reporter(taskCtx);

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.HadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from  w w w.  j  a v a 2  s .c o  m*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJob job = taskCtx.job();

    HadoopV2TaskContext ctx = (HadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    HadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof HadoopFileBlock) {
        HadoopFileBlock block = (HadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new HadoopV1Reporter(taskCtx);

    HadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new HadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.sysml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java

License:Apache License

public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {

    JobConf confCopy = new JobConf(conf);
    List<InputSplit> splits = new ArrayList<>();
    Map<Path, InputFormat> formatMap = MultipleInputs.getInputFormatMap(conf);
    Map<Path, Class<? extends Mapper>> mapperMap = MultipleInputs.getMapperTypeMap(conf);
    Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<>();

    // First, build a map of InputFormats to Paths
    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        if (!formatPaths.containsKey(entry.getValue().getClass())) {
            formatPaths.put(entry.getValue().getClass(), new LinkedList<Path>());
        }/* w w  w  .j av a 2s .co  m*/

        formatPaths.get(entry.getValue().getClass()).add(entry.getKey());
    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry : formatPaths.entrySet()) {
        Class<? extends InputFormat> formatClass = formatEntry.getKey();
        InputFormat format = (InputFormat) ReflectionUtils.newInstance(formatClass, conf);
        List<Path> paths = formatEntry.getValue();

        Map<Class<? extends Mapper>, List<Path>> mapperPaths = new HashMap<>();

        // Now, for each set of paths that have a common InputFormat, build
        // a map of Mappers to the paths they're used for
        for (Path path : paths) {
            Class<? extends Mapper> mapperClass = mapperMap.get(path);
            if (!mapperPaths.containsKey(mapperClass)) {
                mapperPaths.put(mapperClass, new LinkedList<Path>());
            }

            mapperPaths.get(mapperClass).add(path);
        }

        // Now each set of paths that has a common InputFormat and Mapper can
        // be added to the same job, and split together.
        for (Entry<Class<? extends Mapper>, List<Path>> mapEntry : mapperPaths.entrySet()) {
            paths = mapEntry.getValue();
            Class<? extends Mapper> mapperClass = mapEntry.getKey();

            if (mapperClass == null) {
                mapperClass = conf.getMapperClass();
            }

            FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths.size()]));

            // Get splits for each input path and tag with InputFormat
            // and Mapper types by wrapping in a TaggedInputSplit.
            InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(), mapperClass));
            }
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
}