Example usage for org.apache.hadoop.mapreduce Mapper subclass-usage

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Mapper subclass-usage.

Usage

From source file com.datasalt.pangool.tuplemr.TupleMapper.java

/**
 * TupleMapper is the Tuple-based Hadoop's {@link Mapper} version.
 */
@SuppressWarnings({ "rawtypes", "serial" })
public abstract class TupleMapper<INPUT_KEY, INPUT_VALUE>
        extends Mapper<INPUT_KEY, INPUT_VALUE, DatumWrapper<ITuple>, NullWritable> implements Serializable {

From source file com.datasalt.utils.mapred.BaseMapper.java

/**
 * A mapper with methods for serialize and deserialize
 * 
 * @author ivan
 */
public class BaseMapper<IKey, IValue, OKey, OValue> extends Mapper<IKey, IValue, OKey, OValue> {

From source file com.digitalpebble.behemoth.mahout.BehemothLabelMapper.java

public class BehemothLabelMapper extends Mapper<Text, BehemothDocument, Text, Text> {

    private static final Logger log = LoggerFactory.getLogger(BehemothLabelMapper.class);

    private Text metadataKey;

From source file com.digitalpebble.behemoth.mahout.BehemothTokenizerMapper.java

/**
 * Extracts tokens from a Behemoth document and outputs them in a StringTuple
 */
public class BehemothTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> {

    private String tokenType;

From source file com.digitalpebble.behemoth.mahout.LuceneTokenizerMapper.java

/**
 * Tokenizes a text document and outputs tokens in a StringTuple
 */
public class LuceneTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> {

    private Analyzer analyzer;

From source file com.digitalpebble.behemoth.mahout.SequenceFileTokenizerMapper.java

/**
 * Extracts tokens from a Behemoth document and outputs them in a StringTuple
 */
public class SequenceFileTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> {

    private String tokenType;

From source file com.elex.dmp.lda.CachingCVB0Mapper.java

/**
 * Run ensemble learning via loading the {@link ModelTrainer} with two {@link TopicModel} instances:
 * one from the previous iteration, the other empty.  Inference is done on the first, and the
 * learning updates are stored in the second, and only emitted at cleanup().
 *
 * In terms of obvious performance improvements still available, the memory footprint in this

From source file com.elex.dmp.lda.CachingCVB0PerplexityMapper.java

public class CachingCVB0PerplexityMapper extends Mapper<Text, VectorWritable, DoubleWritable, DoubleWritable> {
    /**
     * Hadoop counters for {@link CachingCVB0PerplexityMapper}, to aid in debugging.
     */
    public enum Counters {
        SAMPLED_DOCUMENTS

From source file com.elex.dmp.lda.CVB0TopicTermVectorNormalizerMapper.java

/**
 * Performs L1 normalization of input vectors.
 */
public class CVB0TopicTermVectorNormalizerMapper extends Mapper<Text, VectorWritable, Text, VectorWritable> {

    @Override

From source file com.ema.hadoop.bestclient.BCMapper.java

public class BCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
        DateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");