com.aliyun.odps.mapred.bridge.LotReducerUDTFTest.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.bridge.LotReducerUDTFTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.bridge;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang.RandomStringUtils;
import org.junit.Test;

import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.io.LongWritable;
import com.aliyun.odps.io.Text;
import com.aliyun.odps.mapred.ReducerBase;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.mapred.example.WordCount;
import com.aliyun.odps.mapred.example.WordCountWithMultiInsert;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.UDFException;

public class LotReducerUDTFTest {

    private static ExecutionContext ctx = new MockExecutionContext();

    class MockReducerUDTF extends LotReducerUDTF {

        List<Object[]> forwarded = new ArrayList<Object[]>();
        Object[][] testData;
        int testDataIndex = 0;

        public MockReducerUDTF(BridgeJobConf conf) {
            this.conf = conf;
        }

        public List<Object[]> getForwarded() {
            return forwarded;
        }

        @Override
        public void forward(Object... o) {
            forwarded.add(o.clone());
        }

        public Object[] getNextRowWapper() {
            if (testDataIndex < testData.length) {
                return testData[testDataIndex++];
            }
            return null;
        }

        public void setTestData(Object[][] testData) {
            this.testData = testData;
        }
    }

    ;

    @Test
    public void testProcess() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCount.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) },
                new Object[] { new Text("count"), new LongWritable(2) },
                new Object[] { new Text("count"), new LongWritable(1) },
                new Object[] { new Text("count"), new LongWritable(1) },
                new Object[] { new Text("foo"), new LongWritable(1) },
                new Object[] { new Text("bar"), new LongWritable(1) } };
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(4, forwarded.size());
        assertEquals(new Text("word"), forwarded.get(0)[0]);
        assertEquals(new LongWritable(1), forwarded.get(0)[1]);
        assertEquals(new Text("count"), forwarded.get(1)[0]);
        assertEquals(new LongWritable(4), forwarded.get(1)[1]);
        assertEquals(new Text("foo"), forwarded.get(2)[0]);
        assertEquals(new LongWritable(1), forwarded.get(2)[1]);
        assertEquals(new Text("bar"), forwarded.get(3)[0]);
        assertEquals(new LongWritable(1), forwarded.get(3)[1]);
    }

    @Test
    public void testSkew() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCount.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        Object[][] testData = new Object[10000][2];
        Object[] item = new Object[] { new Text("word"), new LongWritable(1) };
        for (int i = 0; i < 10000; i++) {
            testData[i] = item;
        }
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(1, forwarded.size());
        assertEquals(new Text("word"), forwarded.get(0)[0]);
        assertEquals(new LongWritable(10000), forwarded.get(0)[1]);
    }

    @Test
    public void testMultipleKey() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCount.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        Object[][] testData = new Object[1000][2];
        for (int i = 0; i < 1000; i++) {
            Object[] item = new Object[] { new Text("word" + i), new LongWritable(i) };
            testData[i] = item;
        }
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(1000, forwarded.size());
        assertEquals(new Text("word999"), forwarded.get(999)[0]);
        assertEquals(new LongWritable(999), forwarded.get(999)[1]);
    }

    @Test
    public void testMultiInsert() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCountWithMultiInsert.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "out1");
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "out2");
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").label("out1").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").label("out2").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);

        Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) },
                new Object[] { new Text("word"), new LongWritable(2) },
                new Object[] { new Text("count"), new LongWritable(2) } };
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(4, forwarded.size());
        assertEquals(new Text("word"), forwarded.get(0)[0]);
        assertEquals(new LongWritable(3), forwarded.get(0)[1]);
        assertEquals(new Text("out2"), forwarded.get(0)[2]);
        assertEquals(new Text("count"), forwarded.get(3)[0]);
        assertEquals(new LongWritable(2), forwarded.get(3)[1]);
        assertEquals(new Text("out1"), forwarded.get(3)[2]);
    }

    @Test
    public void testEmptyInput() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCount.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(0, forwarded.size());
    }

    @Test
    public void testNoIteration() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(ReducerBase.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("key:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("nil:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("key:string"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        Object[] item = new Object[] { new Text("word"), new LongWritable(1) };
        for (int i = 0; i < 2; i++) {
            udtf.process(item);
        }
        udtf.close();
        List<Object[]> forwarded = udtf.getForwarded();
        assertEquals(0, forwarded.size());
    }

    public static class ReduceExceptionReducer extends ReducerBase {

        @Override
        public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
            throw new RuntimeException("By design.");
        }
    }

    @Test
    public void testThrowException() throws Exception {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(ReduceExceptionReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        try {
            Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) },
                    new Object[] { new Text("word"), new LongWritable(2) } };
            udtf.setTestData(testData);
            udtf.run();
            udtf.close();
            fail("Not throwing exception.");
        } catch (Exception e) {
            assertTrue(e.getMessage().contains("By design."));
        }
    }

    @Test
    public void profile() throws IOException, UDFException {
        BridgeJobConf conf = new BridgeJobConf();
        conf.setReducerClass(WordCount.SumReducer.class);
        conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL);
        InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf);
        OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf);
        MockReducerUDTF udtf = new MockReducerUDTF(conf);

        udtf.setup(ctx);
        Object[][] testData = new Object[1000000][2];
        for (int i = 0; i < 1000000; i++) {
            Object[] item = new Object[] { new Text(RandomStringUtils.randomAlphabetic(2)), new LongWritable(1) };
            testData[i] = item;
        }
        udtf.setTestData(testData);
        udtf.run();
        udtf.close();
        int sum = 0;
        for (Object[] item : udtf.getForwarded()) {
            sum += ((LongWritable) item[1]).get();
        }
        assertEquals(1000000, sum);
    }

    public static void main(String[] args) throws IOException, UDFException {
        LotReducerUDTFTest test = new LotReducerUDTFTest();
        System.in.read();
        test.profile();
        System.out.println("Done");
        System.in.read();
    }
}