Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.bridge; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.RandomStringUtils; import org.junit.Test; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.io.LongWritable; import com.aliyun.odps.io.Text; import com.aliyun.odps.mapred.ReducerBase; import com.aliyun.odps.mapred.conf.BridgeJobConf; import com.aliyun.odps.mapred.example.WordCount; import com.aliyun.odps.mapred.example.WordCountWithMultiInsert; import com.aliyun.odps.mapred.utils.InputUtils; import com.aliyun.odps.mapred.utils.OutputUtils; import com.aliyun.odps.mapred.utils.SchemaUtils; import com.aliyun.odps.udf.ExecutionContext; import com.aliyun.odps.udf.UDFException; public class LotReducerUDTFTest { private static ExecutionContext ctx = new MockExecutionContext(); class MockReducerUDTF extends LotReducerUDTF { List<Object[]> forwarded = new ArrayList<Object[]>(); Object[][] testData; int testDataIndex = 0; public MockReducerUDTF(BridgeJobConf conf) { this.conf = conf; } public List<Object[]> getForwarded() { return forwarded; } @Override public void forward(Object... o) { forwarded.add(o.clone()); } public Object[] getNextRowWapper() { if (testDataIndex < testData.length) { return testData[testDataIndex++]; } return null; } public void setTestData(Object[][] testData) { this.testData = testData; } } ; @Test public void testProcess() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCount.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) }, new Object[] { new Text("count"), new LongWritable(2) }, new Object[] { new Text("count"), new LongWritable(1) }, new Object[] { new Text("count"), new LongWritable(1) }, new Object[] { new Text("foo"), new LongWritable(1) }, new Object[] { new Text("bar"), new LongWritable(1) } }; udtf.setTestData(testData); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(4, forwarded.size()); assertEquals(new Text("word"), forwarded.get(0)[0]); assertEquals(new LongWritable(1), forwarded.get(0)[1]); assertEquals(new Text("count"), forwarded.get(1)[0]); assertEquals(new LongWritable(4), forwarded.get(1)[1]); assertEquals(new Text("foo"), forwarded.get(2)[0]); assertEquals(new LongWritable(1), forwarded.get(2)[1]); assertEquals(new Text("bar"), forwarded.get(3)[0]); assertEquals(new LongWritable(1), forwarded.get(3)[1]); } @Test public void testSkew() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCount.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[][] testData = new Object[10000][2]; Object[] item = new Object[] { new Text("word"), new LongWritable(1) }; for (int i = 0; i < 10000; i++) { testData[i] = item; } udtf.setTestData(testData); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(1, forwarded.size()); assertEquals(new Text("word"), forwarded.get(0)[0]); assertEquals(new LongWritable(10000), forwarded.get(0)[1]); } @Test public void testMultipleKey() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCount.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[][] testData = new Object[1000][2]; for (int i = 0; i < 1000; i++) { Object[] item = new Object[] { new Text("word" + i), new LongWritable(i) }; testData[i] = item; } udtf.setTestData(testData); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(1000, forwarded.size()); assertEquals(new Text("word999"), forwarded.get(999)[0]); assertEquals(new LongWritable(999), forwarded.get(999)[1]); } @Test public void testMultiInsert() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCountWithMultiInsert.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "out1"); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "out2"); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").label("out1").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").label("out2").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) }, new Object[] { new Text("word"), new LongWritable(2) }, new Object[] { new Text("count"), new LongWritable(2) } }; udtf.setTestData(testData); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(4, forwarded.size()); assertEquals(new Text("word"), forwarded.get(0)[0]); assertEquals(new LongWritable(3), forwarded.get(0)[1]); assertEquals(new Text("out2"), forwarded.get(0)[2]); assertEquals(new Text("count"), forwarded.get(3)[0]); assertEquals(new LongWritable(2), forwarded.get(3)[1]); assertEquals(new Text("out1"), forwarded.get(3)[2]); } @Test public void testEmptyInput() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCount.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(0, forwarded.size()); } @Test public void testNoIteration() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(ReducerBase.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("key:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("nil:bigint")); conf.setOutputSchema(SchemaUtils.fromString("key:string"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[] item = new Object[] { new Text("word"), new LongWritable(1) }; for (int i = 0; i < 2; i++) { udtf.process(item); } udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(0, forwarded.size()); } public static class ReduceExceptionReducer extends ReducerBase { @Override public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException { throw new RuntimeException("By design."); } } @Test public void testThrowException() throws Exception { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(ReduceExceptionReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); try { Object[][] testData = new Object[][] { new Object[] { new Text("word"), new LongWritable(1) }, new Object[] { new Text("word"), new LongWritable(2) } }; udtf.setTestData(testData); udtf.run(); udtf.close(); fail("Not throwing exception."); } catch (Exception e) { assertTrue(e.getMessage().contains("By design.")); } } @Test public void profile() throws IOException, UDFException { BridgeJobConf conf = new BridgeJobConf(); conf.setReducerClass(WordCount.SumReducer.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), TableInfo.DEFAULT_LABEL); InputUtils.addTable(TableInfo.builder().tableName("in_tbl").build(), conf); OutputUtils.addTable(TableInfo.builder().tableName("out_tbl").build(), conf); MockReducerUDTF udtf = new MockReducerUDTF(conf); udtf.setup(ctx); Object[][] testData = new Object[1000000][2]; for (int i = 0; i < 1000000; i++) { Object[] item = new Object[] { new Text(RandomStringUtils.randomAlphabetic(2)), new LongWritable(1) }; testData[i] = item; } udtf.setTestData(testData); udtf.run(); udtf.close(); int sum = 0; for (Object[] item : udtf.getForwarded()) { sum += ((LongWritable) item[1]).get(); } assertEquals(1000000, sum); } public static void main(String[] args) throws IOException, UDFException { LotReducerUDTFTest test = new LotReducerUDTFTest(); System.in.read(); test.profile(); System.out.println("Done"); System.in.read(); } }