Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);//  www. ja  v  a 2  s .co m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    List<Text> expectedOutput = new ArrayList<Text>();
    expectedOutput.add(new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    expectedOutput.add(new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    expectedOutput.add(new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    expectedOutput.add(new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    expectedOutput.add(new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    int count = 0;
    while (reader.next(writableKey, writableValue)) {
        logger.debug("key and value is: " + writableKey + ", " + writableValue);
        assertTrue("Matched output " + writableValue, expectedOutput.contains(writableValue));
        count++;
    }
    IOUtils.closeStream(reader);
    assertEquals(5, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from   w w w .j  a  va  2s  .c  o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.merge.TestMergeKeyMapper.java

License:Apache License

@Test(expected = IOException.class)
public final void testMapperForNullKeyValue() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD);
    when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter);
    MergeKeyMapper mapper = new MergeKeyMapper();
    Text val = new Text("valueOfKey");
    mapper.map(null, val, context);
}

From source file:co.nubetech.hiho.merge.TestMergeKeyMapper.java

License:Apache License

@Test
public final void testMapperValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW);
    when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter);

    MergeKeyMapper mapper = new MergeKeyMapper();
    Text key = new Text("abc123");
    Text val = new Text("valueOfKey");
    mapper.isOld = false;/* w ww.  ja  va  2 s.c  o  m*/
    mapper.map(key, val, context);

    HihoValue hihoValue = new HihoValue();
    hihoValue.setVal(val);
    hihoValue.setIsOld(false);
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);
    verify(context).write(hihoTuple, hihoValue);
    assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java

License:Apache License

@Test
public void testReducerValidValues() throws IOException, InterruptedException {
    Text key = new Text("key123");
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);//from   www. j a  v  a 2s.c  o m

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    Text value1 = new Text("value1");
    Text value2 = new Text("value2");
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
    values.add(hihoValue1);
    values.add(hihoValue2);

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeKeyReducer mergeReducer = new MergeKeyReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(key, value2);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java

License:Apache License

@Test
public void testReducerNullValues() throws IOException, InterruptedException {
    Text key = new Text("key123");
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);// ww w  .j ava  2  s .c  om

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    Text value1 = new Text("value1");
    Text value2 = new Text("value2");
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeKeyReducer mergeReducer = new MergeKeyReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(key, null);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeKeyReducer.java

License:Apache License

@Test
public void testReducerForLongWritableKey() throws IOException, InterruptedException {
    LongWritable key = new LongWritable(Long.parseLong("123"));
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);//www  .  j  av a 2s  .  co  m

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    Text value1 = new Text("value1");
    Text value2 = new Text("value2");
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
    values.add(hihoValue1);
    values.add(hihoValue2);

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeKeyReducer mergeReducer = new MergeKeyReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(key, value2);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeValueMapper.java

License:Apache License

@Test(expected = IOException.class)
public final void testMapperForNullKeyValue() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD);
    when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter);
    MergeValueMapper mapper = new MergeValueMapper();
    Text val = new Text("valueOfKey");
    mapper.map(null, val, context);
}

From source file:co.nubetech.hiho.merge.TestMergeValueMapper.java

License:Apache License

@Test
public final void testMapperValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW);
    when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter);
    MergeValueMapper mapper = new MergeValueMapper();
    Text key = new Text("abc123");
    Text val = new Text("valueOfKey");
    mapper.isOld = false;//from ww  w  .  j av  a2 s . co m
    mapper.map(key, val, context);

    HihoValue hihoValue = new HihoValue();
    hihoValue.setVal(key);
    hihoValue.setIsOld(false);
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(val);
    verify(context).write(hihoTuple, hihoValue);
    assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue());
}

From source file:co.nubetech.hiho.merge.TestMergeValueReducer.java

License:Apache License

@Test
public void testReducerValidValues() throws IOException, InterruptedException {
    Text key = new Text("key123");
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);//from  w  w w .j av a  2s  .co m

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    Text value1 = new Text("value1");
    Text value2 = new Text("value2");
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
    values.add(hihoValue1);
    values.add(hihoValue2);

    Reducer.Context context = mock(Reducer.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.OUTPUT);
    when(context.getCounter(MergeRecordCounter.OUTPUT)).thenReturn(counter);
    MergeValueReducer mergeReducer = new MergeValueReducer();
    mergeReducer.reduce(hihoTuple, values, context);
    verify(context).write(value2, key);
    assertEquals(1, context.getCounter(MergeRecordCounter.OUTPUT).getValue());
}