Java tutorial
/** * Copyright 2011-2016 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.directio.hive.parquet; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.io.IOUtils; import org.junit.Assume; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import com.asakusafw.directio.hive.parquet.mock.MockSimpleWithLong; import com.asakusafw.directio.hive.parquet.mock.WithDate; import com.asakusafw.directio.hive.parquet.mock.WithDateTime; import com.asakusafw.directio.hive.parquet.mock.WithDecimal; import com.asakusafw.directio.hive.parquet.mock.WithFour; import com.asakusafw.directio.hive.parquet.mock.WithString; import com.asakusafw.directio.hive.parquet.mock.WithStringSupports; import com.asakusafw.directio.hive.parquet.mock.WithTimestampSupports; import com.asakusafw.directio.hive.serde.DataModelDescriptorEditor; import com.asakusafw.directio.hive.serde.DataModelMapping.ExceptionHandlingStrategy; import com.asakusafw.directio.hive.serde.DataModelMapping.FieldMappingStrategy; import com.asakusafw.directio.hive.serde.FieldPropertyDescriptor; import com.asakusafw.directio.hive.serde.StringValueSerdeFactory; import com.asakusafw.directio.hive.serde.TimestampValueSerdeFactory; import com.asakusafw.directio.hive.serde.ValueSerde; import com.asakusafw.directio.hive.serde.ValueSerdeFactory; import com.asakusafw.directio.hive.serde.mock.MockSimple; import com.asakusafw.directio.hive.serde.mock.MockTypes; import com.asakusafw.runtime.directio.Counter; import com.asakusafw.runtime.directio.DirectInputFragment; import com.asakusafw.runtime.directio.hadoop.StripedDataFormat; import com.asakusafw.runtime.io.ModelInput; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.value.Date; import com.asakusafw.runtime.value.DateOption; import com.asakusafw.runtime.value.DateTime; import com.asakusafw.runtime.value.DateTimeOption; import com.asakusafw.runtime.value.DecimalOption; import com.asakusafw.runtime.value.IntOption; import com.asakusafw.runtime.value.LongOption; import com.asakusafw.runtime.value.StringOption; import parquet.column.ParquetProperties.WriterVersion; /** * Test for {@link ParquetFileFormat}. */ public class ParquetFileFormatTest { private static final long LOCAL_TIMEZONE_OFFSET = TimeUnit.MILLISECONDS .toSeconds(TimeZone.getDefault().getRawOffset()); // test data may be created different timestamp private static final long TESTDATA_TIMEZONE_OFFSET = TimeUnit.MILLISECONDS .toSeconds(TimeZone.getTimeZone("JST").getRawOffset()); /** * A temporary folder for testing. */ @Rule public final TemporaryFolder folder = new TemporaryFolder(); private <T> ParquetFileFormat<T> format(Class<T> type, String... removes) { return format(type, Collections.<String, ValueSerde>emptyMap(), removes); } private <T> ParquetFileFormat<T> format(Class<T> type, Map<String, ? extends ValueSerde> edits, String... removes) { ParquetFileFormat<T> format = new ParquetFileFormat<>("testing", new ParquetFormatConfiguration(), new DataModelDescriptorEditor(FieldPropertyDescriptor.extract(type)).editAll(edits) .removeAll(Arrays.asList(removes)).build()); format.setConf(new org.apache.hadoop.conf.Configuration()); return format; } /** * Test method for {@link AbstractParquetFileFormat#getFormatName()}. */ @Test public void format_name() { assertThat(format(MockSimple.class).getFormatName(), equalTo("PARQUET")); } /** * Test method for {@link AbstractParquetFileFormat#getSupportedType()}. */ @Test public void supported_type() { assertThat(format(MockSimple.class).getSupportedType(), equalTo((Object) MockSimple.class)); } /** * {@code tblproperties} for default settings. */ @Test public void table_properties_default() { Map<String, String> props = format(MockSimple.class).getTableProperties(); assertThat(props.size(), is(0)); } /** * simple I/O. * @throws Exception if failed */ @Test public void io_simple() throws Exception { ParquetFileFormat<MockSimple> format = format(MockSimple.class); MockSimple in = new MockSimple(100, "Hello, world!"); MockSimple out = restore(format, in); assertThat(out.number, is(new IntOption(100))); assertThat(out.string, is(new StringOption("Hello, world!"))); } /** * I/O with all supported types. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_types() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(10, 2)); ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); MockTypes in = new MockTypes(); in.booleanOption.modify(true); in.byteOption.modify((byte) 1); in.shortOption.modify((short) 2); in.intOption.modify(3); in.longOption.modify(4L); in.floatOption.modify(5f); in.doubleOption.modify(6d); in.dateOption.modify(new Date(2014, 6, 1)); in.dateTimeOption.modify(new DateTime(2014, 6, 1, 2, 3, 4)); in.stringOption.modify("Hello, world!"); in.decimalOption.modify(new BigDecimal("7.89")); MockTypes out = restore(format, in); assertThat(out.booleanOption, equalTo(in.booleanOption)); assertThat(out.byteOption, equalTo(in.byteOption)); assertThat(out.shortOption, equalTo(in.shortOption)); assertThat(out.intOption, equalTo(in.intOption)); assertThat(out.longOption, equalTo(in.longOption)); assertThat(out.floatOption, equalTo(in.floatOption)); assertThat(out.doubleOption, equalTo(in.doubleOption)); assertThat(out.dateOption, equalTo(in.dateOption)); assertThat(out.dateTimeOption, equalTo(in.dateTimeOption)); assertThat(out.stringOption, equalTo(in.stringOption)); assertThat(out.decimalOption, equalTo(in.decimalOption)); } /** * I/O with decimals. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_decimals() throws Exception { for (int p = 2; p <= HiveDecimal.MAX_PRECISION; p++) { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(p, 2)); ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); MockTypes in = new MockTypes(); if (p < 3) { in.decimalOption.modify(new BigDecimal("0.14")); } else { in.decimalOption.modify(new BigDecimal("3.14")); } MockTypes out = restore(format, in); assertThat(out.decimalOption, equalTo(in.decimalOption)); } } /** * I/O with decimals. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_decimals_int32() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(9, 2)); int count = 100; ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); List<MockTypes> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { MockTypes in = new MockTypes(); in.decimalOption.modify(new BigDecimal("7.89")); inputs.add(in); } List<MockTypes> outputs = restore(format, inputs); MockTypes sample = inputs.get(0); for (MockTypes out : outputs) { assertThat(out.decimalOption, equalTo(sample.decimalOption)); } } /** * I/O with decimals. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_decimals_int64() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(18, 2)); int count = 100; ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); List<MockTypes> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { MockTypes in = new MockTypes(); in.decimalOption.modify(new BigDecimal("7.89")); inputs.add(in); } List<MockTypes> outputs = restore(format, inputs); MockTypes sample = inputs.get(0); for (MockTypes out : outputs) { assertThat(out.decimalOption, equalTo(sample.decimalOption)); } } /** * I/O with decimals. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_decimals_binary() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(38, 2)); int count = 100; ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); List<MockTypes> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { MockTypes in = new MockTypes(); in.decimalOption.modify(new BigDecimal("-7.89")); inputs.add(in); } List<MockTypes> outputs = restore(format, inputs); MockTypes sample = inputs.get(0); for (MockTypes out : outputs) { assertThat(out.decimalOption, equalTo(sample.decimalOption)); } } /** * I/O with all supported types. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_types_large() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(10, 2)); int count = 1000; ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); List<MockTypes> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { MockTypes in = new MockTypes(); in.booleanOption.modify(true); in.byteOption.modify((byte) 1); in.shortOption.modify((short) 2); in.intOption.modify(3); in.longOption.modify(4L); in.floatOption.modify(5f); in.doubleOption.modify(6d); in.dateOption.modify(new Date(2014, 6, 1)); in.dateTimeOption.modify(new DateTime(2014, 6, 1, 2, 3, 4)); in.stringOption.modify("Hello, world!"); in.decimalOption.modify(new BigDecimal("7.89")); inputs.add(in); } List<MockTypes> outputs = restore(format, inputs); MockTypes sample = inputs.get(0); for (MockTypes out : outputs) { assertThat(out.booleanOption, equalTo(sample.booleanOption)); assertThat(out.byteOption, equalTo(sample.byteOption)); assertThat(out.shortOption, equalTo(sample.shortOption)); assertThat(out.intOption, equalTo(sample.intOption)); assertThat(out.longOption, equalTo(sample.longOption)); assertThat(out.floatOption, equalTo(sample.floatOption)); assertThat(out.doubleOption, equalTo(sample.doubleOption)); assertThat(out.dateOption, equalTo(sample.dateOption)); assertThat(out.dateTimeOption, equalTo(sample.dateTimeOption)); assertThat(out.stringOption, equalTo(sample.stringOption)); assertThat(out.decimalOption, equalTo(sample.decimalOption)); } } /** * I/O with all supported types with {@code null}s. * @throws Exception if failed */ @Test public void io_nulls() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimalOption", ValueSerdeFactory.getDecimal(10, 2)); ParquetFileFormat<MockTypes> format = format(MockTypes.class, edits); MockTypes in = new MockTypes(); MockTypes out = restore(format, in); assertThat(out.booleanOption, equalTo(in.booleanOption)); assertThat(out.byteOption, equalTo(in.byteOption)); assertThat(out.shortOption, equalTo(in.shortOption)); assertThat(out.intOption, equalTo(in.intOption)); assertThat(out.longOption, equalTo(in.longOption)); assertThat(out.floatOption, equalTo(in.floatOption)); assertThat(out.doubleOption, equalTo(in.doubleOption)); assertThat(out.dateOption, equalTo(in.dateOption)); assertThat(out.dateTimeOption, equalTo(in.dateTimeOption)); assertThat(out.stringOption, equalTo(in.stringOption)); assertThat(out.decimalOption, equalTo(in.decimalOption)); } /** * I/O with fragment. * @throws Exception if failed */ @Test public void io_fragment() throws Exception { File file = folder.newFile(); Assume.assumeThat(file.delete() || file.exists() == false, is(true)); ParquetFileFormat<MockSimple> format = format(MockSimple.class); LocalFileSystem fs = FileSystem.getLocal(format.getConf()); try (ModelOutput<MockSimple> output = format.createOutput(MockSimple.class, fs, new Path(file.toURI()), new Counter())) { output.write(new MockSimple(100, "Hello, world!")); } assertThat(file.exists(), is(true)); FileStatus stat = fs.getFileStatus(new Path(file.toURI())); List<DirectInputFragment> fragments = format.computeInputFragments(new StripedDataFormat.InputContext( MockSimple.class, Arrays.asList(stat), fs, -1L, -1L, false, false)); assertThat(fragments, hasSize(1)); DirectInputFragment first = fragments.get(0); try (ModelInput<MockSimple> input = format.createInput(MockSimple.class, fs, new Path(first.getPath()), first.getOffset(), first.getSize(), new Counter())) { MockSimple buf = new MockSimple(); assertThat(input.readTo(buf), is(true)); assertThat(buf.number, is(new IntOption(100))); assertThat(buf.string, is(new StringOption("Hello, world!"))); assertThat(input.readTo(buf), is(false)); } } /** * I/O with {@code v2}. * @throws Exception if failed */ @Test public void io_v_2() throws Exception { ParquetFileFormat<MockSimple> format = format(MockSimple.class); format.getFormatConfiguration().withWriterVersion(WriterVersion.PARQUET_2_0); MockSimple in = new MockSimple(100, "Hello, world!"); MockSimple out = restore(format, in); assertThat(out.number, is(new IntOption(100))); assertThat(out.string, is(new StringOption("Hello, world!"))); } /** * Field mapping by its name. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void mapping_by_position() throws Exception { ParquetFileFormat<WithFour> f1 = format(WithFour.class, "col1", "col3"); ParquetFileFormat<WithFour> f2 = format(WithFour.class, "col2", "col3"); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.POSITION); WithFour in = new WithFour(); in.col0.modify(0); in.col1.modify(1); in.col2.modify(2); in.col3.modify(3); File file = save(f1, Arrays.asList(in)); List<WithFour> results = load(f2, file); assertThat(results, hasSize(1)); WithFour out = results.get(0); assertThat(out.col0, is(new IntOption(0))); assertThat(out.col1, is(new IntOption(2))); assertThat(out.col2, is(new IntOption())); assertThat(out.col3, is(new IntOption())); } /** * Field mapping by its name. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void mapping_by_name() throws Exception { ParquetFileFormat<WithFour> f1 = format(WithFour.class, "col1", "col3"); ParquetFileFormat<WithFour> f2 = format(WithFour.class, "col2", "col3"); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME); WithFour in = new WithFour(); in.col0.modify(0); in.col1.modify(1); in.col2.modify(2); in.col3.modify(3); File file = save(f1, Arrays.asList(in)); List<WithFour> results = load(f2, file); assertThat(results, hasSize(1)); WithFour out = results.get(0); assertThat(out.col0, is(new IntOption(0))); assertThat(out.col1, is(new IntOption())); assertThat(out.col2, is(new IntOption())); assertThat(out.col3, is(new IntOption())); } /** * fail on missing source. * @throws Exception if failed */ @Test public void fail_on_missing_source() throws Exception { ParquetFileFormat<WithFour> f1 = format(WithFour.class, "col3"); ParquetFileFormat<WithFour> f2 = format(WithFour.class); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME) .withOnMissingSource(ExceptionHandlingStrategy.FAIL); WithFour in = new WithFour(); File file = save(f1, Arrays.asList(in)); try { load(f2, file); fail(); } catch (IllegalArgumentException e) { // ok. } } /** * fail on missing target. * @throws Exception if failed */ @Test public void fail_on_missing_target() throws Exception { ParquetFileFormat<WithFour> f1 = format(WithFour.class); ParquetFileFormat<WithFour> f2 = format(WithFour.class, "col3"); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME) .withOnMissingTarget(ExceptionHandlingStrategy.FAIL); WithFour in = new WithFour(); File file = save(f1, Arrays.asList(in)); try { load(f2, file); fail(); } catch (IllegalArgumentException e) { // ok. } } /** * ignore on incompatible type. * @throws Exception if failed */ @Test public void ignore_on_incompatible_type() throws Exception { ParquetFileFormat<MockSimple> f1 = format(MockSimple.class); ParquetFileFormat<MockSimpleWithLong> f2 = format(MockSimpleWithLong.class); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME) .withOnIncompatibleType(ExceptionHandlingStrategy.IGNORE); MockSimple in = new MockSimple(100, "Hello, world!"); File file = save(f1, Arrays.asList(in)); List<MockSimpleWithLong> results = load(f2, file); assertThat(results, hasSize(1)); MockSimpleWithLong out = results.get(0); assertThat(out.number, is(new LongOption())); assertThat(out.string, is(in.string)); } /** * fail on incompatible type. * @throws Exception if failed */ @Test public void fail_on_incompatible_type() throws Exception { ParquetFileFormat<MockSimple> f1 = format(MockSimple.class); ParquetFileFormat<MockSimpleWithLong> f2 = format(MockSimpleWithLong.class); f2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME) .withOnIncompatibleType(ExceptionHandlingStrategy.FAIL); MockSimple in = new MockSimple(100, "Hello, world!"); File file = save(f1, Arrays.asList(in)); try { load(f2, file); fail(); } catch (IllegalArgumentException e) { // ok. } } /** * using strings. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_string() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimal", StringValueSerdeFactory.DECIMAL); edits.put("date", StringValueSerdeFactory.DATE); edits.put("datetime", StringValueSerdeFactory.DATETIME); ParquetFileFormat<WithStringSupports> format = format(WithStringSupports.class, edits); WithStringSupports in = new WithStringSupports(); in.decimal.modify(new BigDecimal("123.45")); in.date.modify(new Date(2014, 7, 1)); in.datetime.modify(new DateTime(2014, 7, 1, 12, 34, 56)); WithStringSupports out = restore(format, in); assertThat(out.decimal, is(in.decimal)); assertThat(out.date, is(in.date)); assertThat(out.datetime, is(in.datetime)); } /** * using strings with dictionary. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_string_dict() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("decimal", StringValueSerdeFactory.DECIMAL); edits.put("date", StringValueSerdeFactory.DATE); edits.put("datetime", StringValueSerdeFactory.DATETIME); ParquetFileFormat<WithStringSupports> format = format(WithStringSupports.class, edits); int count = 1000; List<WithStringSupports> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { WithStringSupports object = new WithStringSupports(); object.decimal.modify(new BigDecimal("123.45")); object.date.modify(new Date(2014, 7, 1)); object.datetime.modify(new DateTime(2014, 7, 1, 12, 34, 56)); inputs.add(object); } WithStringSupports sample = inputs.get(0); List<WithStringSupports> outputs = restore(format, inputs); for (WithStringSupports out : outputs) { assertThat(out.decimal, is(sample.decimal)); assertThat(out.date, is(sample.date)); assertThat(out.datetime, is(sample.datetime)); } } /** * using timestamps. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_timestamp() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("date", TimestampValueSerdeFactory.DATE); edits.put("datetime", ValueSerdeFactory.DATETIME); ParquetFileFormat<WithTimestampSupports> format = format(WithTimestampSupports.class, edits); WithTimestampSupports in = new WithTimestampSupports(); in.date.modify(new Date(2015, 7, 1)); in.datetime.modify(new DateTime(2015, 7, 1, 12, 34, 56)); WithTimestampSupports out = restore(format, in); assertThat(out.date, is(in.date)); assertThat(out.datetime, is(in.datetime)); } /** * using char. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_char() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("value", ValueSerdeFactory.getChar(10)); ParquetFileFormat<WithString> format = format(WithString.class, edits); WithString in = new WithString(); in.value.modify("Hello, world!"); WithString out = restore(format, in); assertThat(out.value, is(new StringOption("Hello, world!".substring(0, 10)))); } /** * using varchar. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_varchar() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("value", ValueSerdeFactory.getVarchar(10)); ParquetFileFormat<WithString> format = format(WithString.class, edits); WithString in = new WithString(); in.value.modify("Hello, world!"); WithString out = restore(format, in); assertThat(out.value, is(new StringOption("Hello, world!".substring(0, 10)))); } /** * using varchar. * @throws Exception if failed */ @SuppressWarnings("deprecation") @Test public void io_varchar_dict() throws Exception { Map<String, ValueSerde> edits = new HashMap<>(); edits.put("value", ValueSerdeFactory.getVarchar(10)); ParquetFileFormat<WithString> format = format(WithString.class, edits); int count = 1000; List<WithString> inputs = new ArrayList<>(); for (int i = 0; i < count; i++) { WithString in = new WithString(); in.value.modify("Hello"); inputs.add(in); } List<WithString> outputs = restore(format, inputs); for (WithString out : outputs) { assertThat(out.value, is(new StringOption("Hello"))); count--; } assertThat(count, is(0)); } /** * loading char type which generated by hive. * @throws Exception if failed */ @Test public void format_char() throws Exception { checkString("char-10-hello-parquet.parquet", ValueSerdeFactory.getChar(10), "Hello, Parquet!".substring(0, 10)); } /** * loading varchar type which generated by hive. * @throws Exception if failed */ @Test public void format_varchar() throws Exception { checkString("varchar-10-hello-parquet.parquet", ValueSerdeFactory.getVarchar(10), "Hello, Parquet!".substring(0, 10)); } private void checkString(String file, ValueSerde serde, String expected) throws IOException, InterruptedException { WithString buf = new WithString(); ParquetFileFormat<WithString> format = format(WithString.class, Collections.singletonMap("value", serde)); try (ModelInput<WithString> input = load(format, file)) { assertThat(input.readTo(buf), is(true)); assertThat(input.readTo(new WithString()), is(false)); } assertThat(buf.value, is(new StringOption(expected))); } /** * loading decimal type which generated by hive. * @throws Exception if failed */ @Test public void format_decimal() throws Exception { checkDecimal("decimal-9_2-3_14.parquet"); checkDecimal("decimal-18_2-3_14.parquet"); checkDecimal("decimal-38_2-3_14.parquet"); } private void checkDecimal(String file) throws IOException, InterruptedException { Pattern p = Pattern.compile("decimal-(\\d+)_(\\d+)-(.+)\\.parquet"); Matcher matcher = p.matcher(file); assertThat(matcher.matches(), is(true)); int precision = Integer.parseInt(matcher.group(1)); int scale = Integer.parseInt(matcher.group(2)); WithDecimal buf = new WithDecimal(); ParquetFileFormat<WithDecimal> format = format(WithDecimal.class, Collections.singletonMap("value", ValueSerdeFactory.getDecimal(precision, scale))); try (ModelInput<WithDecimal> input = load(format, file)) { assertThat(input.readTo(buf), is(true)); assertThat(input.readTo(new WithDecimal()), is(false)); } BigDecimal expected = new BigDecimal(matcher.group(3).replace('_', '.')); assertThat(buf.value, is(new DecimalOption(expected))); } /** * loading timestamp type which generated be hive. * @throws Exception if failed */ @Test public void format_timestamp() throws Exception { checkDateTime("timestamp-1970-01-01-00-00-00.parquet"); checkDateTime("timestamp-1970-01-01-12-34-56.parquet"); checkDateTime("timestamp-2014-12-01-23-59-59.parquet"); } @SuppressWarnings("deprecation") private void checkDateTime(String file) throws IOException, InterruptedException { Pattern p = Pattern.compile("timestamp-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)-(\\d+)\\.parquet"); Matcher matcher = p.matcher(file); assertThat(matcher.matches(), is(true)); WithDateTime buf = new WithDateTime(); try (ModelInput<WithDateTime> input = load(WithDateTime.class, file)) { assertThat(input.readTo(buf), is(true)); assertThat(input.readTo(new WithDateTime()), is(false)); } // fix timezone buf.value.modify(buf.value.get().getElapsedSeconds() + TESTDATA_TIMEZONE_OFFSET - LOCAL_TIMEZONE_OFFSET); DateTime expected = new DateTime(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)), Integer.parseInt(matcher.group(3)), Integer.parseInt(matcher.group(4)), Integer.parseInt(matcher.group(5)), Integer.parseInt(matcher.group(6))); assertThat(buf.value, is(new DateTimeOption(expected))); } /** * loading date type which generated be hive. * @throws Exception if failed */ @Test public void format_date() throws Exception { checkDate("date-1970-01-01.parquet"); checkDate("date-2015-12-31.parquet"); checkDate("date-1995-05-23.parquet"); } private void checkDate(String file) throws IOException, InterruptedException { Pattern p = Pattern.compile("date-(\\d+)-(\\d+)-(\\d+)\\.parquet"); Matcher matcher = p.matcher(file); assertThat(matcher.matches(), is(true)); WithDate buf = new WithDate(); try (ModelInput<WithDate> input = load(WithDate.class, file)) { assertThat(input.readTo(buf), is(true)); assertThat(input.readTo(new WithDate()), is(false)); } Date expected = new Date(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)), Integer.parseInt(matcher.group(3))); assertThat(buf.value, is(new DateOption(expected))); } private <T> ModelInput<T> load(Class<T> modelType, String name) throws IOException, InterruptedException { ParquetFileFormat<T> format = format(modelType); return load(format, name); } private <T> ModelInput<T> load(ParquetFileFormat<T> format, String name) throws IOException, InterruptedException { File target = folder.newFile(); try (InputStream in = getClass().getResourceAsStream(name)) { assertThat(in, is(notNullValue())); IOUtils.copyBytes(in, new FileOutputStream(target), 1024, true); } FileSystem fs = FileSystem.getLocal(format.getConf()); return format.createInput(format.getSupportedType(), fs, new Path(target.toURI()), 0, -1, new Counter()); } private <T> T restore(ParquetFileFormat<T> format, T value) throws IOException, InterruptedException { List<T> in = new ArrayList<>(); in.add(value); return restore(format, in).get(0); } private <T> List<T> restore(ParquetFileFormat<T> format, List<T> values) throws IOException, InterruptedException { File file = save(format, values); List<T> results = load(format, file); assertThat(values, hasSize(results.size())); return results; } private <T> File save(ParquetFileFormat<T> format, List<T> values) throws IOException, InterruptedException { File file = folder.newFile(); Assume.assumeThat(file.delete() || file.exists() == false, is(true)); LocalFileSystem fs = FileSystem.getLocal(format.getConf()); try (ModelOutput<T> output = format.createOutput(format.getSupportedType(), fs, new Path(file.toURI()), new Counter())) { for (T value : values) { output.write(value); } } assertThat(file.exists(), is(true)); return file; } private <T> List<T> load(ParquetFileFormat<T> format, File file) throws IOException, InterruptedException { LocalFileSystem fs = FileSystem.getLocal(format.getConf()); try (ModelInput<T> input = format.createInput(format.getSupportedType(), fs, new Path(file.toURI()), 0, file.length(), new Counter())) { List<T> results = new ArrayList<>(); while (true) { @SuppressWarnings("unchecked") T value = (T) format.getDataModelDescriptor().createDataModelObject(); if (input.readTo(value) == false) { break; } results.add(value); } return results; } } }