Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:co.nubetech.hiho.mapreduce.TestGenericDBLoadDataMapper.java

License:Apache License

@Test
public final void testMapperWithValidValues() throws Exception {

    Mapper.Context context = mock(Mapper.Context.class);
    GenericDBLoadDataMapper mapper = new GenericDBLoadDataMapper();

    mapper.setDelimiter(",");
    ArrayList<ColumnInfo> tableInfo = new ArrayList<ColumnInfo>();

    ColumnInfo columnInfo1 = new ColumnInfo();
    columnInfo1.setIndex(0);//from   w w  w  .j a v a 2s  . c  o  m
    columnInfo1.setName("id");
    columnInfo1.setType(Types.BIGINT);

    ColumnInfo columnInfo2 = new ColumnInfo();
    columnInfo2.setIndex(1);
    columnInfo2.setName("name");
    columnInfo2.setType(Types.VARCHAR);

    ColumnInfo columnInfo3 = new ColumnInfo();
    columnInfo3.setIndex(2);
    columnInfo3.setName("isValid");
    columnInfo3.setType(Types.BOOLEAN);

    /*ColumnInfo columnInfo4 = new ColumnInfo();
    columnInfo4.setIndex(3);
    columnInfo4.setName("date");
    columnInfo4.setType(Types.DATE);*/

    ColumnInfo columnInfo5 = new ColumnInfo();
    columnInfo5.setIndex(4);
    columnInfo5.setName("percent");
    columnInfo5.setType(Types.DOUBLE);

    tableInfo.add(columnInfo1);
    tableInfo.add(columnInfo2);
    tableInfo.add(columnInfo3);
    //tableInfo.add(columnInfo4);
    tableInfo.add(columnInfo5);

    mapper.setTableInfo(tableInfo);

    mapper.map(new LongWritable(0l), new Text("1,Sam,true,84.0"), context);

    ArrayList values = new ArrayList();
    values.add(1l);
    values.add("Sam");
    values.add(true);
    values.add(84.0);
    GenericDBWritable gdw = new GenericDBWritable(tableInfo, values);
    verify(context).write(gdw, null);

}

From source file:co.nubetech.hiho.mapreduce.TestGenericDBLoadDataMapper.java

License:Apache License

@Test
public final void testMapperWithNullValues() throws Exception {

    Mapper.Context context = mock(Mapper.Context.class);
    GenericDBLoadDataMapper mapper = new GenericDBLoadDataMapper();

    mapper.setDelimiter(",");
    ArrayList<ColumnInfo> tableInfo = new ArrayList<ColumnInfo>();

    ColumnInfo columnInfo1 = new ColumnInfo();
    columnInfo1.setIndex(0);/*from   w  w  w.j av  a 2  s. co m*/
    columnInfo1.setName("id");
    columnInfo1.setType(Types.BIGINT);

    ColumnInfo columnInfo2 = new ColumnInfo();
    columnInfo2.setIndex(1);
    columnInfo2.setName("name");
    columnInfo2.setType(Types.VARCHAR);

    ColumnInfo columnInfo3 = new ColumnInfo();
    columnInfo3.setIndex(2);
    columnInfo3.setName("isValid");
    columnInfo3.setType(Types.BOOLEAN);

    /*ColumnInfo columnInfo4 = new ColumnInfo();
    columnInfo4.setIndex(3);
    columnInfo4.setName("date");
    columnInfo4.setType(Types.DATE);*/

    ColumnInfo columnInfo5 = new ColumnInfo();
    columnInfo5.setIndex(4);
    columnInfo5.setName("percent");
    columnInfo5.setType(Types.DOUBLE);

    tableInfo.add(columnInfo1);
    tableInfo.add(columnInfo2);
    tableInfo.add(columnInfo3);
    //tableInfo.add(columnInfo4);
    tableInfo.add(columnInfo5);

    mapper.setTableInfo(tableInfo);

    mapper.map(new LongWritable(0l), new Text("1, ,true,84.0"), context);

    ArrayList values = new ArrayList();
    values.add(1l);
    values.add(null);
    values.add(true);
    values.add(84.0);
    GenericDBWritable gdw = new GenericDBWritable(tableInfo, values);
    verify(context).write(gdw, null);

}

From source file:co.nubetech.hiho.mapreduce.TestGenericDBLoadDataMapper.java

License:Apache License

@Test(expected = IOException.class)
public final void testMapperWithUnequalLengthOfColumnInFileAndTable() throws Exception {

    Mapper.Context context = mock(Mapper.Context.class);
    GenericDBLoadDataMapper mapper = new GenericDBLoadDataMapper();

    mapper.setDelimiter(",");
    ArrayList<ColumnInfo> tableInfo = new ArrayList<ColumnInfo>();

    ColumnInfo columnInfo1 = new ColumnInfo();
    columnInfo1.setIndex(0);/*from ww w . j  a v a  2 s.com*/
    columnInfo1.setName("id");
    columnInfo1.setType(Types.BIGINT);

    ColumnInfo columnInfo2 = new ColumnInfo();
    columnInfo2.setIndex(1);
    columnInfo2.setName("name");
    columnInfo2.setType(Types.VARCHAR);

    ColumnInfo columnInfo3 = new ColumnInfo();
    columnInfo3.setIndex(2);
    columnInfo3.setName("isValid");
    columnInfo3.setType(Types.BOOLEAN);

    /*ColumnInfo columnInfo4 = new ColumnInfo();
    columnInfo4.setIndex(3);
    columnInfo4.setName("date");
    columnInfo4.setType(Types.DATE);*/

    ColumnInfo columnInfo5 = new ColumnInfo();
    columnInfo5.setIndex(4);
    columnInfo5.setName("percent");
    columnInfo5.setType(Types.DOUBLE);

    tableInfo.add(columnInfo1);
    tableInfo.add(columnInfo2);
    tableInfo.add(columnInfo3);
    //tableInfo.add(columnInfo4);
    tableInfo.add(columnInfo5);

    mapper.setTableInfo(tableInfo);

    mapper.map(new LongWritable(0l), new Text("1,Sam,true,84.0,42"), context);
}

From source file:co.nubetech.hiho.mapreduce.TestMySQLLoadMapper.java

License:Apache License

/**
 * @param string//from   w  w w  .java 2 s.  c om
 * @throws IOException
 * @throws SQLException
 * @throws InterruptedException
 */
private void runMapper(String tablename) throws IOException, SQLException, InterruptedException {
    Context context = mock(Context.class);
    MySQLLoadDataMapper mapper = new MySQLLoadDataMapper();
    FSDataInputStream val;
    val = new FSDataInputStream(new MyInputStream());
    Connection con = mock(Connection.class);
    com.mysql.jdbc.Statement stmt = mock(com.mysql.jdbc.Statement.class);
    mapper.setConnection(con);
    String query = "load data local infile 'abc.txt' into table tablename " + QUERY_SUFFIX
            + " (col1,col2,col3)";
    when(con.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE)).thenReturn(stmt);
    Configuration conf = new Configuration();
    conf.set(HIHOConf.LOAD_QUERY_SUFFIX, QUERY_SUFFIX);
    conf.setBoolean(HIHOConf.LOAD_KEY_IS_TABLENAME, true);
    conf.setBoolean(HIHOConf.LOAD_HAS_HEADER, true);
    when(context.getConfiguration()).thenReturn(conf);
    when(stmt.executeUpdate(query)).thenReturn(10);
    Counter counter = mock(Counter.class);
    when(context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TABLE_tablename")).thenReturn(counter);
    when(context.getCounter("MySQLLoadCounters", "ROWS_INSERTED_TOTAL")).thenReturn(counter);
    mapper.map(new Text(tablename), val, context);
    verify(stmt).setLocalInfileInputStream(val);
    verify(stmt).executeUpdate(query);
    verify(counter, times(2)).increment(10);
}

From source file:co.nubetech.hiho.mapreduce.TestOracleLoadMapper.java

License:Apache License

@Test
public final void testMapper() throws Exception {
    Mapper.Context context = mock(Mapper.Context.class);
    OracleLoadMapper mapper = new OracleLoadMapper();
    FTPClient ftpClient = mock(FTPClient.class);
    FSDataInputStream val = mock(FSDataInputStream.class);
    Text key = new Text("key");
    mapper.setFtpClient(ftpClient);/*from w w w.j  a  va 2  s  .  c om*/
    mapper.map(key, val, context);
    verify(ftpClient).appendFile("key", val);
}

From source file:co.nubetech.hiho.merge.HihoValue.java

License:Apache License

public void setVal(V val) {
    this.val = val;
    this.valClass = new Text(val.getClass().getName());
}

From source file:co.nubetech.hiho.merge.TestHihoValue.java

License:Apache License

@Test
public void testSetVal() {
    Text val = new Text("AbC");
    HihoValue hihoValue = new HihoValue();
    hihoValue.setVal(val);
    assertEquals(val, hihoValue.getVal());
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByCustomObjectKeyWithSequenceFileInputFormat() throws Exception {
    Student student1 = setStudent(new Text("Sam"), new Text("US"), new IntWritable(1),
            new LongWritable(9999999998l), new DoubleWritable(99.12));
    Student student2 = setStudent(new Text("John"), new Text("AUS"), new IntWritable(2),
            new LongWritable(9999999999l), new DoubleWritable(90.12));
    Student student3 = setStudent(new Text("Mary"), new Text("UK"), new IntWritable(3),
            new LongWritable(9999999988l), new DoubleWritable(69.12));
    Student student4 = setStudent(new Text("Kelvin"), new Text("UK"), new IntWritable(4),
            new LongWritable(9999998888l), new DoubleWritable(59.12));

    HashMap<Student, Text> inputData1 = new HashMap<Student, Text>();
    inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<Student, Text> inputData2 = new HashMap<Student, Text>();
    inputData2.put(student2, new Text("Austin Farley,4794 Donec Ave,1-230-823-8164,13508"));
    inputData2.put(student3, new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(student4, new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName",
            "co.nubetech.hiho.testdata.Student", "-inputValueClassName", "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);/* w  w w  . ja  va  2s.c o m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    List<Student> expectedOutput = new ArrayList<Student>();
    expectedOutput.add(student1);
    expectedOutput.add(student2);
    expectedOutput.add(student3);
    expectedOutput.add(student4);
    int count = 0;
    while (reader.next(writableKey, writableValue)) {
        logger.debug("key and value is: " + writableKey + ", " + writableValue);
        assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey));
        count++;
    }
    IOUtils.closeStream(reader);
    assertEquals(4, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByIntWritableKeyWithSequenceFileInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);/*from   ww  w.j a  va2s  .  c o  m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    List<IntWritable> expectedOutput = new ArrayList<IntWritable>();
    expectedOutput.add(new IntWritable(1));
    expectedOutput.add(new IntWritable(2));
    expectedOutput.add(new IntWritable(3));
    expectedOutput.add(new IntWritable(4));
    int count = 0;
    while (reader.next(writableKey, writableValue)) {
        logger.debug("key and value is: " + writableKey + ", " + writableValue);
        assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey));
        count++;
    }
    IOUtils.closeStream(reader);
    assertEquals(4, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByLongWritableKeyWithSequenceFileInputFormat() throws Exception {
    HashMap<LongWritable, Text> inputData1 = new HashMap<LongWritable, Text>();
    inputData1.put(new LongWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new LongWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new LongWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<LongWritable, Text> inputData2 = new HashMap<LongWritable, Text>();
    inputData2.put(new LongWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new LongWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new LongWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.LongWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);/*  w  w  w . j a  v a2s.  c  o  m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    List<LongWritable> expectedOutput = new ArrayList<LongWritable>();
    expectedOutput.add(new LongWritable(1));
    expectedOutput.add(new LongWritable(2));
    expectedOutput.add(new LongWritable(3));
    expectedOutput.add(new LongWritable(4));
    int count = 0;
    while (reader.next(writableKey, writableValue)) {
        logger.debug("key and value is: " + writableKey + ", " + writableValue);
        assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey));
        count++;
    }
    IOUtils.closeStream(reader);
    assertEquals(4, count);

}