org.unigram.likelike.lsh
Class SelectClustersMapper

java.lang.Object
  extended by org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,SeedClusterId,org.apache.hadoop.io.Text>
      extended by org.unigram.likelike.lsh.SelectClustersMapper

public class SelectClustersMapper
extends org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,SeedClusterId,org.apache.hadoop.io.Text>

SelectClustersMapper.


Nested Class Summary
 
Nested classes/interfaces inherited from class org.apache.hadoop.mapreduce.Mapper
org.apache.hadoop.mapreduce.Mapper.Context
 
Field Summary
static java.lang.String DEFAULT_MINWISE_HASH_SEEDS
          Default: hash seed.
static java.lang.String MINWISE_HASH_SEEDS
          Symbol: hash seed.
 
Constructor Summary
SelectClustersMapper()
           
 
Method Summary
 void map(org.apache.hadoop.io.LongWritable key, org.apache.hadoop.io.Text value, org.apache.hadoop.mapreduce.Mapper.Context context)
          map.
 void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
          setup.
static java.util.StringTokenizer tokenize(java.lang.String line, java.lang.String pattern)
           
static java.util.StringTokenizer tokenize(org.apache.hadoop.io.Text value, java.lang.String pattern)
           
 
Methods inherited from class org.apache.hadoop.mapreduce.Mapper
cleanup, run
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

MINWISE_HASH_SEEDS

public static final java.lang.String MINWISE_HASH_SEEDS
Symbol: hash seed.

See Also:
Constant Field Values

DEFAULT_MINWISE_HASH_SEEDS

public static final java.lang.String DEFAULT_MINWISE_HASH_SEEDS
Default: hash seed.

See Also:
Constant Field Values
Constructor Detail

SelectClustersMapper

public SelectClustersMapper()
Method Detail

tokenize

public static java.util.StringTokenizer tokenize(java.lang.String line,
                                                 java.lang.String pattern)

tokenize

public static java.util.StringTokenizer tokenize(org.apache.hadoop.io.Text value,
                                                 java.lang.String pattern)

map

public final void map(org.apache.hadoop.io.LongWritable key,
                      org.apache.hadoop.io.Text value,
                      org.apache.hadoop.mapreduce.Mapper.Context context)
               throws java.io.IOException,
                      java.lang.InterruptedException
map.

Overrides:
map in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,SeedClusterId,org.apache.hadoop.io.Text>
Parameters:
key - dummy
value - containing id and the features
context - context
Throws:
java.io.IOException - -
java.lang.InterruptedException - -

setup

public final void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
setup.

Overrides:
setup in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,SeedClusterId,org.apache.hadoop.io.Text>
Parameters:
context - context