Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java

License:Apache License

@Test
public void testReducerNullValues() throws IOException, InterruptedException {
    Text key = new Text("key123");
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);/*from w w  w .ja  va 2 s  .com*/
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeValueReducer mergeReducer = new MergeValueReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(null, key);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java

License:Apache License

@Test
public void testReducerForLongWritableKey() throws IOException, InterruptedException {
    LongWritable key = new LongWritable(Long.parseLong("123"));
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);//from   w w w.j  a va2  s .  c om

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    Text value1 = new Text("value1");
    Text value2 = new Text("value2");
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
    values.add(hihoValue1);
    values.add(hihoValue2);

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeValueReducer mergeReducer = new MergeValueReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(value2, key);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}

From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java

License:Apache License

@Override
public void map(Text key, Text val, Context context) throws IOException, InterruptedException {
    if (key == null) {
        throw new IOException("Key is null");
    }//ww w.  jav a  2  s .  co m
    HashSet<String> nGramList = new HashSet<String>();
    int gramSize = 2;
    nGramList = getNGrams(key, gramSize);
    for (String nGrams : nGramList) {
        String value = key.toString() + "delimiterBetweenKeyAndValue" + val.toString();
        context.write(new Text(nGrams), new Text(value));
        logger.info("Key and Value in NGram Mapper is: " + new Text(nGrams) + ", " + new Text(value));
    }
}

From source file:co.nubetech.hiho.similarity.ngram.NGramReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

    if (key == null) {
        throw new IOException("Key is null");
    }/*www .  j  a  v  a  2s . c om*/

    logger.info("Key in NGram Reducer is: " + key);

    ArrayList<Text> value = new ArrayList<Text>();

    Iterator<Text> iterator = values.iterator();
    while (iterator.hasNext()) {
        Text valueInIterator = iterator.next();
        logger.info("Value in NGram Reducer is: " + valueInIterator);
        value.add(new Text(valueInIterator));
    }

    for (Text valueInList : value) {
        logger.info("Value added in list is: " + valueInList);
    }

    for (int i = 0; i < value.size() - 1; i++) {
        for (int j = i + 1; j < value.size(); j++) {
            ValuePair valuePair = new ValuePair();
            valuePair.setValue1(value.get(i));
            valuePair.setValue2(value.get(j));
            logger.info("Value set in ValuePair is: " + value.get(i) + ", " + value.get(j));
            context.write(valuePair, new IntWritable(1));
        }

    }
}

From source file:co.nubetech.hiho.similarity.ngram.TestNGramMapper.java

License:Apache License

@Test(expected = IOException.class)
public final void testMapperForNullValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    NGramMapper mapper = new NGramMapper();
    mapper.map(null, new Text("value1"), context);
}

From source file:co.nubetech.hiho.similarity.ngram.TestNGramMapper.java

License:Apache License

@Test
public final void testMapperForValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    NGramMapper mapper = new NGramMapper();
    mapper.map(new Text("This is a book"), new Text("value1"), context);
    verify(context).write(new Text("a book"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    verify(context).write(new Text("This is"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    verify(context).write(new Text("is a"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
}

From source file:co.nubetech.hiho.similarity.ngram.TestNGramReducer.java

License:Apache License

@Test(expected = IOException.class)
public final void testReducerForNullValues() throws IOException, InterruptedException {
    ArrayList<Text> values = new ArrayList<Text>();
    values.add(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    values.add(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2"));
    Reducer.Context context = mock(Reducer.Context.class);
    NGramReducer reducer = new NGramReducer();
    reducer.reduce(null, values, context);
}

From source file:co.nubetech.hiho.similarity.ngram.TestNGramReducer.java

License:Apache License

@Test
public void testReducerValidValues() throws IOException, InterruptedException {
    ArrayList<Text> values = new ArrayList<Text>();
    values.add(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    values.add(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2"));
    Reducer.Context context = mock(Reducer.Context.class);
    NGramReducer reducer = new NGramReducer();
    reducer.reduce(new Text("This is"), values, context);
    ValuePair valuePair = new ValuePair();
    valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2"));
    verify(context).write(valuePair, new IntWritable(1));
}

From source file:co.nubetech.hiho.similarity.ngram.TestScoreJob.java

License:Apache License

@Test
public void testScoreJobForValidValues() throws Exception {
    ValuePair valuePair = new ValuePair();
    valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2"));
    HashMap<ValuePair, IntWritable> inputData1 = new HashMap<ValuePair, IntWritable>();
    inputData1.put(valuePair, new IntWritable(1));
    createSequenceFileInHdfs(inputData1, "outputOfNGramJob", "part-r-00000");

    HashMap<ValuePair, IntWritable> inputData2 = new HashMap<ValuePair, IntWritable>();
    inputData2.put(valuePair, new IntWritable(1));
    createSequenceFileInHdfs(inputData2, "outputOfNGramJob", "part-r-00001");

    String[] args = new String[] {};
    ScoreJob job = runScoreJob(args);//from   w  w w.  ja v a  2  s. co m

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "outputOfScoreJob/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

    List<ValuePair> expectedOutputForKey = new ArrayList<ValuePair>();
    expectedOutputForKey.add(valuePair);

    List<LongWritable> expectedOutputForValue = new ArrayList<LongWritable>();
    expectedOutputForValue.add(new LongWritable(2));

    int count = 0;
    while (reader.next(writableKey, writableValue)) {
        logger.debug("Key and value is: " + writableKey + ", " + writableValue);
        assertTrue("Matched output " + writableKey, expectedOutputForKey.contains(writableKey));
        assertTrue("Matched output " + writableValue, expectedOutputForValue.contains(writableValue));
        count++;
    }
    IOUtils.closeStream(reader);
    assertEquals(1, count);
}

From source file:co.nubetech.hiho.similarity.ngram.TestScoreMapper.java

License:Apache License

@Test
public final void testMapperValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    ScoreMapper mapper = new ScoreMapper();
    ValuePair valuePair = new ValuePair();
    valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1"));
    valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2"));
    mapper.map(valuePair, new IntWritable(1), context);
    verify(context).write(valuePair, new IntWritable(1));
}