List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByValueWithSequenceFileInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(new IntWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" }; MergeJob job = runMergeJobs(args);// www. ja v a 2 s .co m assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<Text> expectedOutput = new ArrayList<Text>(); expectedOutput.add(new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); expectedOutput.add(new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); expectedOutput.add(new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); expectedOutput.add(new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); expectedOutput.add(new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableValue, expectedOutput.contains(writableValue)); count++; } IOUtils.closeStream(reader); assertEquals(5, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" }; MergeJob job = runMergeJobs(args);//from w w w .j a va 2s .c o m assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"); expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"); expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"); expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"); expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.merge.TestMergeKeyMapper.java
License:Apache License
@Test(expected = IOException.class) public final void testMapperForNullKeyValue() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD); when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text val = new Text("valueOfKey"); mapper.map(null, val, context); }
From source file:co.nubetech.hiho.merge.TestMergeKeyMapper.java
License:Apache License
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW); when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text key = new Text("abc123"); Text val = new Text("valueOfKey"); mapper.isOld = false;/* w ww. ja va 2 s.c o m*/ mapper.map(key, val, context); HihoValue hihoValue = new HihoValue(); hihoValue.setVal(val); hihoValue.setIsOld(false); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key); verify(context).write(hihoTuple, hihoValue); assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java
License:Apache License
@Test public void testReducerValidValues() throws IOException, InterruptedException { Text key = new Text("key123"); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);//from www. j a v a 2s.c o m HihoValue hihoValue1 = new HihoValue(); HihoValue hihoValue2 = new HihoValue(); Text value1 = new Text("value1"); Text value2 = new Text("value2"); hihoValue1.setVal(value1); hihoValue2.setVal(value2); hihoValue1.setIsOld(true); hihoValue2.setIsOld(false); ArrayList<HihoValue> values = new ArrayList<HihoValue>(); values.add(hihoValue1); values.add(hihoValue2); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeKeyReducer mergeReducer = new MergeKeyReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(key, value2); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java
License:Apache License
@Test public void testReducerNullValues() throws IOException, InterruptedException { Text key = new Text("key123"); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);// ww w .j ava 2 s .c om HihoValue hihoValue1 = new HihoValue(); HihoValue hihoValue2 = new HihoValue(); Text value1 = new Text("value1"); Text value2 = new Text("value2"); hihoValue1.setVal(value1); hihoValue2.setVal(value2); hihoValue1.setIsOld(true); hihoValue2.setIsOld(false); ArrayList<HihoValue> values = new ArrayList<HihoValue>(); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeKeyReducer mergeReducer = new MergeKeyReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(key, null); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java
License:Apache License
@Test public void testReducerForLongWritableKey() throws IOException, InterruptedException { LongWritable key = new LongWritable(Long.parseLong("123")); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);//www . j av a 2s . co m HihoValue hihoValue1 = new HihoValue(); HihoValue hihoValue2 = new HihoValue(); Text value1 = new Text("value1"); Text value2 = new Text("value2"); hihoValue1.setVal(value1); hihoValue2.setVal(value2); hihoValue1.setIsOld(true); hihoValue2.setIsOld(false); ArrayList<HihoValue> values = new ArrayList<HihoValue>(); values.add(hihoValue1); values.add(hihoValue2); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeKeyReducer mergeReducer = new MergeKeyReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(key, value2); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeValueMapper.java
License:Apache License
@Test(expected = IOException.class) public final void testMapperForNullKeyValue() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD); when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter); MergeValueMapper mapper = new MergeValueMapper(); Text val = new Text("valueOfKey"); mapper.map(null, val, context); }
From source file:co.nubetech.hiho.merge.TestMergeValueMapper.java
License:Apache License
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW); when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter); MergeValueMapper mapper = new MergeValueMapper(); Text key = new Text("abc123"); Text val = new Text("valueOfKey"); mapper.isOld = false;//from ww w . j av a2 s . co m mapper.map(key, val, context); HihoValue hihoValue = new HihoValue(); hihoValue.setVal(key); hihoValue.setIsOld(false); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(val); verify(context).write(hihoTuple, hihoValue); assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue()); }
From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java
License:Apache License
@Test public void testReducerValidValues() throws IOException, InterruptedException { Text key = new Text("key123"); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);//from w w w .j av a 2s .co m HihoValue hihoValue1 = new HihoValue(); HihoValue hihoValue2 = new HihoValue(); Text value1 = new Text("value1"); Text value2 = new Text("value2"); hihoValue1.setVal(value1); hihoValue2.setVal(value2); hihoValue1.setIsOld(true); hihoValue2.setIsOld(false); ArrayList<HihoValue> values = new ArrayList<HihoValue>(); values.add(hihoValue1); values.add(hihoValue2); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT); when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter); MergeValueReducer mergeReducer = new MergeValueReducer(); mergeReducer.reduce(hihoTuple, values, context); verify(context).write(value2, key); assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue()); }