List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java
License:Apache License
@Test public void testReducerNullValues() throws IOException, InterruptedException { Text key = new Text("key123"); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);/*from w w w .ja va 2 s .com*/ ArrayList<HihoValue> values = new ArrayList<HihoValue>(); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeValueReducer mergeReducer = new MergeValueReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(null, key); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java
License:Apache License
@Test public void testReducerForLongWritableKey() throws IOException, InterruptedException { LongWritable key = new LongWritable(Long.parseLong("123")); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);//from w w w.j a va2 s . c om HihoValue hihoValue1 = new HihoValue(); HihoValue hihoValue2 = new HihoValue(); Text value1 = new Text("value1"); Text value2 = new Text("value2"); hihoValue1.setVal(value1); hihoValue2.setVal(value2); hihoValue1.setIsOld(true); hihoValue2.setIsOld(false); ArrayList<HihoValue> values = new ArrayList<HihoValue>(); values.add(hihoValue1); values.add(hihoValue2); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeValueReducer mergeReducer = new MergeValueReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(value2, key); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java
License:Apache License
@Override public void map(Text key, Text val, Context context) throws IOException, InterruptedException { if (key == null) { throw new IOException("Key is null"); }//ww w. jav a 2 s . co m HashSet<String> nGramList = new HashSet<String>(); int gramSize = 2; nGramList = getNGrams(key, gramSize); for (String nGrams : nGramList) { String value = key.toString() + "delimiterBetweenKeyAndValue" + val.toString(); context.write(new Text(nGrams), new Text(value)); logger.info("Key and Value in NGram Mapper is: " + new Text(nGrams) + ", " + new Text(value)); } }
From source file:co.nubetech.hiho.similarity.ngram.NGramReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { if (key == null) { throw new IOException("Key is null"); }/*www . j a v a 2s . c om*/ logger.info("Key in NGram Reducer is: " + key); ArrayList<Text> value = new ArrayList<Text>(); Iterator<Text> iterator = values.iterator(); while (iterator.hasNext()) { Text valueInIterator = iterator.next(); logger.info("Value in NGram Reducer is: " + valueInIterator); value.add(new Text(valueInIterator)); } for (Text valueInList : value) { logger.info("Value added in list is: " + valueInList); } for (int i = 0; i < value.size() - 1; i++) { for (int j = i + 1; j < value.size(); j++) { ValuePair valuePair = new ValuePair(); valuePair.setValue1(value.get(i)); valuePair.setValue2(value.get(j)); logger.info("Value set in ValuePair is: " + value.get(i) + ", " + value.get(j)); context.write(valuePair, new IntWritable(1)); } } }
From source file:co.nubetech.hiho.similarity.ngram.TestNGramMapper.java
License:Apache License
@Test(expected = IOException.class) public final void testMapperForNullValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); NGramMapper mapper = new NGramMapper(); mapper.map(null, new Text("value1"), context); }
From source file:co.nubetech.hiho.similarity.ngram.TestNGramMapper.java
License:Apache License
@Test public final void testMapperForValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); NGramMapper mapper = new NGramMapper(); mapper.map(new Text("This is a book"), new Text("value1"), context); verify(context).write(new Text("a book"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); verify(context).write(new Text("This is"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); verify(context).write(new Text("is a"), new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); }
From source file:co.nubetech.hiho.similarity.ngram.TestNGramReducer.java
License:Apache License
@Test(expected = IOException.class) public final void testReducerForNullValues() throws IOException, InterruptedException { ArrayList<Text> values = new ArrayList<Text>(); values.add(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); values.add(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2")); Reducer.Context context = mock(Reducer.Context.class); NGramReducer reducer = new NGramReducer(); reducer.reduce(null, values, context); }
From source file:co.nubetech.hiho.similarity.ngram.TestNGramReducer.java
License:Apache License
@Test public void testReducerValidValues() throws IOException, InterruptedException { ArrayList<Text> values = new ArrayList<Text>(); values.add(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); values.add(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2")); Reducer.Context context = mock(Reducer.Context.class); NGramReducer reducer = new NGramReducer(); reducer.reduce(new Text("This is"), values, context); ValuePair valuePair = new ValuePair(); valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2")); verify(context).write(valuePair, new IntWritable(1)); }
From source file:co.nubetech.hiho.similarity.ngram.TestScoreJob.java
License:Apache License
@Test public void testScoreJobForValidValues() throws Exception { ValuePair valuePair = new ValuePair(); valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2")); HashMap<ValuePair, IntWritable> inputData1 = new HashMap<ValuePair, IntWritable>(); inputData1.put(valuePair, new IntWritable(1)); createSequenceFileInHdfs(inputData1, "outputOfNGramJob", "part-r-00000"); HashMap<ValuePair, IntWritable> inputData2 = new HashMap<ValuePair, IntWritable>(); inputData2.put(valuePair, new IntWritable(1)); createSequenceFileInHdfs(inputData2, "outputOfNGramJob", "part-r-00001"); String[] args = new String[] {}; ScoreJob job = runScoreJob(args);//from w w w. ja v a 2 s. co m FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "outputOfScoreJob/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<ValuePair> expectedOutputForKey = new ArrayList<ValuePair>(); expectedOutputForKey.add(valuePair); List<LongWritable> expectedOutputForValue = new ArrayList<LongWritable>(); expectedOutputForValue.add(new LongWritable(2)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("Key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutputForKey.contains(writableKey)); assertTrue("Matched output " + writableValue, expectedOutputForValue.contains(writableValue)); count++; } IOUtils.closeStream(reader); assertEquals(1, count); }
From source file:co.nubetech.hiho.similarity.ngram.TestScoreMapper.java
License:Apache License
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); ScoreMapper mapper = new ScoreMapper(); ValuePair valuePair = new ValuePair(); valuePair.setValue1(new Text("This is a bookdelimiterBetweenKeyAndValuevalue1")); valuePair.setValue2(new Text("This is not a bookdelimiterBetweenKeyAndValuevalue2")); mapper.map(valuePair, new IntWritable(1), context); verify(context).write(valuePair, new IntWritable(1)); }