com.aliyun.odps.mapred.bridge.LotMapperUDTFTest.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.bridge.LotMapperUDTFTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.bridge;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.RandomStringUtils;
import org.junit.Before;
import org.junit.Test;

import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.data.TableInfo.TableInfoBuilder;
import com.aliyun.odps.io.LongWritable;
import com.aliyun.odps.io.Text;
import com.aliyun.odps.mapred.MapperBase;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.mapred.example.WordCount;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;

public class LotMapperUDTFTest {

    private MockExecutionContext ctx;
    private BridgeJobConf conf;

    Object[][] testData = new Object[][] { new Object[] { new Text("to be ") },
            new Object[] { new Text("or not to be") } };
    Object[][] testDataSame = new Object[][] { new Object[] { new Text("1 1 1") },
            new Object[] { new Text("1 1 1 1") } };

    class MockMapperUDTF extends LotMapperUDTF {

        List<Object[]> forwarded = new ArrayList<Object[]>();
        Object[][] testData;
        int testDataIndex = 0;

        public MockMapperUDTF(BridgeJobConf conf, Object[][] testData) {
            this.conf = conf;
            this.testData = testData;
        }

        public List<Object[]> getForwarded() {
            return forwarded;
        }

        @Override
        public void forward(Object... o) {
            forwarded.add(o.clone());
        }

        public Object[] getNextRowWapper() {
            if (testDataIndex < testData.length) {
                return testData[testDataIndex++];
            }
            return null;
        }

        public void setTestData(Object[][] testData) {
            this.testData = testData;
        }
    }

    ;

    @Before
    public void setUp() {
        ctx = new MockExecutionContext();
        conf = new BridgeJobConf();

        TableInfo tblInfo = new TableInfo.TableInfoBuilder().projectName("prj").tableName("tbl")
                .cols(new String[] { "col1" }).build();

        TableInfo output = new TableInfo.TableInfoBuilder().projectName("prj").tableName("out").label("foo")
                .build();

        ctx.setTableInfo(tblInfo.getProjectName() + "." + tblInfo.getTableName());
        InputUtils.addTable(tblInfo, conf);
        OutputUtils.addTable(output, conf);
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "foo");
        conf.setInputSchema(tblInfo, SchemaUtils.fromString("word:string"));
    }

    @Test
    public void testProcess() throws Exception {
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);
        udtf.run();
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(6, forwarded.size());
        assertEquals(new Text("to"), forwarded.get(0)[0]);
        assertEquals(new LongWritable(1), forwarded.get(0)[1]);
        assertEquals(new Text("be"), forwarded.get(1)[0]);
        assertEquals(new LongWritable(1), forwarded.get(1)[1]);
        assertEquals(new Text("or"), forwarded.get(2)[0]);
        assertEquals(new LongWritable(1), forwarded.get(2)[1]);
        assertEquals(new Text("not"), forwarded.get(3)[0]);
        assertEquals(new LongWritable(1), forwarded.get(3)[1]);
        assertEquals(new Text("to"), forwarded.get(4)[0]);
        assertEquals(new LongWritable(1), forwarded.get(4)[1]);
        assertEquals(new Text("be"), forwarded.get(5)[0]);
        assertEquals(new LongWritable(1), forwarded.get(5)[1]);
    }

    @Test
    public void testCombiner() throws Exception {
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setCombinerClass(WordCount.SumCombiner.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);
        udtf.run();
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(4, forwarded.size());
        assertEquals(new Text("be"), forwarded.get(0)[0]);
        assertEquals(new LongWritable(2), forwarded.get(0)[1]);
        assertEquals(new Text("not"), forwarded.get(1)[0]);
        assertEquals(new LongWritable(1), forwarded.get(1)[1]);
        assertEquals(new Text("or"), forwarded.get(2)[0]);
        assertEquals(new LongWritable(1), forwarded.get(2)[1]);
        assertEquals(new Text("to"), forwarded.get(3)[0]);
        assertEquals(new LongWritable(2), forwarded.get(3)[1]);
    }

    @Test
    public void testCombinerBuffer() throws Exception {
        conf.setCombinerCacheItems(2);
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setCombinerClass(WordCount.SumCombiner.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);
        udtf.run();
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(6, forwarded.size());
        assertEquals(new Text("be"), forwarded.get(0)[0]);
        assertEquals(new Text("to"), forwarded.get(1)[0]);
        assertEquals(new Text("not"), forwarded.get(2)[0]);
        assertEquals(new Text("or"), forwarded.get(3)[0]);
        assertEquals(new Text("be"), forwarded.get(4)[0]);
        assertEquals(new Text("to"), forwarded.get(5)[0]);
    }

    @Test
    public void testCombinerBufferSameKey() throws Exception {
        conf.setCombinerCacheItems(2);
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setCombinerClass(WordCount.SumCombiner.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testDataSame);

        udtf.setup(ctx);
        udtf.run();
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(4, forwarded.size());
        assertEquals(new LongWritable(2), forwarded.get(0)[1]);
        assertEquals(new LongWritable(2), forwarded.get(1)[1]);
        assertEquals(new LongWritable(2), forwarded.get(2)[1]);
        assertEquals(new LongWritable(1), forwarded.get(3)[1]);
    }

    @Test
    public void testEmptyInputWithCombiner() throws Exception {
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setCombinerClass(WordCount.SumCombiner.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(0, forwarded.size());
    }

    @Test
    public void testEmptyInput() throws Exception {
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);
        udtf.close();

        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(0, forwarded.size());
    }

    @Test
    public void profile() throws Exception {
        conf.setMapperClass(WordCount.TokenizerMapper.class);
        conf.setCombinerClass(WordCount.SumCombiner.class);
        // conf.setCombinerClass(WordCount.SumCombiner.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);

        Object[][] testData = new Object[100000][1];
        for (int i = 0; i < 100000; i++) {
            testData[i] = new Object[] { RandomStringUtils.randomAlphabetic(5) };
        }
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        int sum = 0;
        for (Object[] item : udtf.getForwarded()) {
            sum += ((LongWritable) item[1]).get();
        }
        assertEquals(100000, sum);
    }

    @Test
    public void testGetTableInfo() {
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);
        TableInfoBuilder builder = TableInfo.builder().projectName("foo").tableName("bar");
        assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf), "foo.bar"));
        assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf),
                "foo.bar/ds=19970701/hr=19;foo.bar/ds=19970701/hr=20"));
        builder.partSpec("ds=19970701/hr=19");
        assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf), "foo.bar/ds=19970701/hr=19"));
    }

    public static class ExceptionMapper extends MapperBase {

        @Override
        public void map(long key, Record record, TaskContext context) throws IOException {

            throw new RuntimeException("By design.");
        }
    }

    @Test
    public void testThrowException() throws Exception {
        conf.setMapperClass(ExceptionMapper.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);

        udtf.setup(ctx);

        try {
            Object[][] testData = new Object[][] { new Object[] { new Text("to be ") },
                    new Object[] { new Text("or not to be") } };
            udtf.setTestData(testData);
            udtf.run();
            udtf.close();
            fail("Not throwing exception.");
        } catch (Exception e) {
            assertTrue(e.getMessage().contains("By design."));
        }
    }

    public static class InvalidLabelMapper extends MapperBase {

        @Override
        public void map(long key, Record record, TaskContext context) throws IOException {

            context.write(record, "nonexist");
        }
    }

    @Test
    public void testWriteToInvalidLabel() throws Exception {
        conf.setMapperClass(InvalidLabelMapper.class);
        conf.setNumReduceTasks(0);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));

        InputUtils.addTable(TableInfo.builder().projectName("proj").tableName("tb2").label("tb2").build(), conf);
        MockMapperUDTF udtf = new MockMapperUDTF(conf, testData);
        udtf.setup(ctx);

        try {
            Object[][] testData = new Object[][] { new Object[] { new Text("to be ") },
                    new Object[] { new Text("or not to be") } };
            udtf.setTestData(testData);
            udtf.run();

            udtf.close();
            fail("Not throwing exception.");
        } catch (Exception e) {
            assertTrue(e.getMessage().contains(ErrorCode.NO_SUCH_LABEL.toString()));
        }
    }

    public static void main(String[] args) throws Exception {
        LotMapperUDTFTest test = new LotMapperUDTFTest();
        System.in.read();
        test.profile();
        System.out.println("Done");
        System.in.read();
    }
}