List of usage examples for org.apache.hadoop.mapreduce Mapper subclass-usage
From source file com.datasalt.pangool.tuplemr.TupleMapper.java
/** * TupleMapper is the Tuple-based Hadoop's {@link Mapper} version. */ @SuppressWarnings({ "rawtypes", "serial" }) public abstract class TupleMapper<INPUT_KEY, INPUT_VALUE> extends Mapper<INPUT_KEY, INPUT_VALUE, DatumWrapper<ITuple>, NullWritable> implements Serializable {
From source file com.datasalt.utils.mapred.BaseMapper.java
/** * A mapper with methods for serialize and deserialize * * @author ivan */ public class BaseMapper<IKey, IValue, OKey, OValue> extends Mapper<IKey, IValue, OKey, OValue> {
From source file com.digitalpebble.behemoth.mahout.BehemothLabelMapper.java
public class BehemothLabelMapper extends Mapper<Text, BehemothDocument, Text, Text> { private static final Logger log = LoggerFactory.getLogger(BehemothLabelMapper.class); private Text metadataKey;
From source file com.digitalpebble.behemoth.mahout.BehemothTokenizerMapper.java
/** * Extracts tokens from a Behemoth document and outputs them in a StringTuple */ public class BehemothTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> { private String tokenType;
From source file com.digitalpebble.behemoth.mahout.LuceneTokenizerMapper.java
/** * Tokenizes a text document and outputs tokens in a StringTuple */ public class LuceneTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> { private Analyzer analyzer;
From source file com.digitalpebble.behemoth.mahout.SequenceFileTokenizerMapper.java
/** * Extracts tokens from a Behemoth document and outputs them in a StringTuple */ public class SequenceFileTokenizerMapper extends Mapper<Text, BehemothDocument, Text, StringTuple> { private String tokenType;
From source file com.elex.dmp.lda.CachingCVB0Mapper.java
/**
* Run ensemble learning via loading the {@link ModelTrainer} with two {@link TopicModel} instances:
* one from the previous iteration, the other empty. Inference is done on the first, and the
* learning updates are stored in the second, and only emitted at cleanup().
*
* In terms of obvious performance improvements still available, the memory footprint in this
From source file com.elex.dmp.lda.CachingCVB0PerplexityMapper.java
public class CachingCVB0PerplexityMapper extends Mapper<Text, VectorWritable, DoubleWritable, DoubleWritable> { /** * Hadoop counters for {@link CachingCVB0PerplexityMapper}, to aid in debugging. */ public enum Counters { SAMPLED_DOCUMENTS
From source file com.elex.dmp.lda.CVB0TopicTermVectorNormalizerMapper.java
/** * Performs L1 normalization of input vectors. */ public class CVB0TopicTermVectorNormalizerMapper extends Mapper<Text, VectorWritable, Text, VectorWritable> { @Override
From source file com.ema.hadoop.bestclient.BCMapper.java
public class BCMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { DateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");