Java tutorial
/** * Copyright 2015 Expedia Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hotels.corc.mapred; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.nullValue; import static org.junit.Assert.assertThat; import static org.mockito.Mockito.mock; import java.io.File; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import com.hotels.corc.Corc; import com.hotels.corc.DefaultConverterFactory; import com.hotels.corc.StructTypeInfoBuilder; import com.hotels.corc.test.OrcWriter; @RunWith(MockitoJUnitRunner.class) public class CorcInputFormatTest { @Rule public final TemporaryFolder temporaryFolder = new TemporaryFolder(); @Mock private Reporter reporter; private final JobConf conf = new JobConf(); private final CorcInputFormat inputFormat = new CorcInputFormat(); private File file; private Path path; private FileSplit split; @Before public void before() throws IOException { file = new File(temporaryFolder.getRoot(), "part-00000"); path = new Path(file.getCanonicalPath()); try (OrcWriter writer = new OrcWriter.Builder(conf, path).addField("a", TypeInfoFactory.stringTypeInfo) .addField("b", TypeInfoFactory.stringTypeInfo).build()) { writer.addRow("A1", "B1"); } split = new FileSplit(path, 0L, file.length(), (String[]) null); } @Test(expected = IOException.class) public void notAFileSplit() throws IOException { InputSplit split = mock(InputSplit.class); inputFormat.getRecordReader(split, conf, reporter); } @Test public void readColumnProjection() throws IOException { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo).build(); CorcInputFormat.setTypeInfo(conf, typeInfo); CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class); RecordReader<NullWritable, Corc> reader = inputFormat.getRecordReader(split, conf, reporter); Corc corc = reader.createValue(); reader.next(NullWritable.get(), corc); assertThat(corc.get("a"), is((Object) "A1")); assertThat(corc.get("b"), is(nullValue())); reader.close(); } @Test public void getSplits() throws IOException { conf.set("mapred.input.dir", temporaryFolder.getRoot().getCanonicalPath()); InputSplit[] splits = inputFormat.getSplits(conf, 1); assertThat(splits.length, is(1)); FileSplit actual = (FileSplit) splits[0]; assertThat(actual.getPath().toUri().getRawPath(), is(path.toUri().getRawPath())); assertThat(actual.getStart(), is(0L)); assertThat(actual.getLength(), is(file.length())); } @Test public void readFullyReadSchemaFromSplit() throws IOException { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); CorcInputFormat.setTypeInfo(conf, typeInfo); CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class); RecordReader<NullWritable, Corc> reader = inputFormat.getRecordReader(split, conf, reporter); Corc corc = reader.createValue(); reader.next(NullWritable.get(), corc); assertThat(corc.get("a"), is((Object) "A1")); assertThat(corc.get("b"), is((Object) "B1")); reader.close(); } @Test public void readFullyDeclaredSchema() throws IOException { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); CorcInputFormat.setTypeInfo(conf, typeInfo); CorcInputFormat.setSchemaTypeInfo(conf, typeInfo); CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class); RecordReader<NullWritable, Corc> reader = inputFormat.getRecordReader(split, conf, reporter); Corc corc = reader.createValue(); reader.next(NullWritable.get(), corc); assertThat(corc.get("a"), is((Object) "A1")); assertThat(corc.get("b"), is((Object) "B1")); reader.close(); } @Test public void setInputTypeInfo() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); CorcInputFormat.setTypeInfo(conf, typeInfo); assertThat(conf.get(CorcInputFormat.INPUT_TYPE_INFO), is("struct<a:string,b:string>")); } @Test public void getInputTypeInfo() { conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string,b:string>"); StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); assertThat(CorcInputFormat.getTypeInfo(conf), is(typeInfo)); } @Test public void setSchemaTypeInfo() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); CorcInputFormat.setSchemaTypeInfo(conf, typeInfo); assertThat(conf.get(CorcInputFormat.SCHEMA_TYPE_INFO), is("struct<a:string,b:string>")); } @Test public void setSchemaTypeInfoNull() { CorcInputFormat.setSchemaTypeInfo(conf, null); assertThat(conf.get(CorcInputFormat.SCHEMA_TYPE_INFO), is(nullValue())); } @Test public void getSchemaTypeInfo() { conf.set(CorcInputFormat.SCHEMA_TYPE_INFO, "struct<a:string,b:string>"); StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.stringTypeInfo).build(); assertThat(CorcInputFormat.getSchemaTypeInfo(conf), is(typeInfo)); } @Test public void getSchemaTypeInfoNull() { assertThat(CorcInputFormat.getSchemaTypeInfo(conf), is(nullValue())); } @Test public void getSchemaTypeInfoEmpty() { conf.set(CorcInputFormat.SCHEMA_TYPE_INFO, ""); assertThat(CorcInputFormat.getSchemaTypeInfo(conf), is(nullValue())); } @Test public void setInputReadColumnProjection() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.longTypeInfo).build(); conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string>"); CorcInputFormat.setReadColumns(conf, typeInfo); assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false)); assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR), is("a")); assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0")); } @Test public void setInputReadColumnsAll() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.longTypeInfo).build(); conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string,b:bigint>"); CorcInputFormat.setReadColumns(conf, typeInfo); assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false)); assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR), is("a,b")); assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0,1")); } @Test public void setInputReadColumnsMissing() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo) .add("b", TypeInfoFactory.longTypeInfo).build(); conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string,b:bigint,c:string>"); CorcInputFormat.setReadColumns(conf, typeInfo); assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false)); assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0,1")); } @Test(expected = IllegalStateException.class) public void setInputReadColumnsDifferentTypes() { StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo).build(); conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:bigint>"); CorcInputFormat.setReadColumns(conf, typeInfo); } @Test public void searchArgument() { SearchArgument searchArgument = SearchArgumentFactory.newBuilder().startAnd().equals("a", "b").end() .build(); CorcInputFormat.setSearchArgument(conf, searchArgument); String kryo = conf.get(CorcInputFormat.SEARCH_ARGUMENT); assertThat(kryo, is(searchArgument.toKryo())); } @Test public void searchArgumentNull() { CorcInputFormat.setSearchArgument(conf, null); String kryo = conf.get(CorcInputFormat.SEARCH_ARGUMENT); assertThat(kryo, is(nullValue())); } @Test(expected = RuntimeException.class) public void converterFactoryNull() { CorcInputFormat.getConverterFactory(conf); } @Test public void converterFactory() { CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class); assertThat(conf.get(CorcInputFormat.CONVERTER_FACTORY), is("com.hotels.corc.DefaultConverterFactory")); } }