com.asakusafw.directio.hive.orc.OrcFileFormatTest.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.directio.hive.orc.OrcFileFormatTest.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.directio.hive.orc;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import com.asakusafw.directio.hive.serde.DataModelDescriptorEditor;
import com.asakusafw.directio.hive.serde.DataModelMapping.ExceptionHandlingStrategy;
import com.asakusafw.directio.hive.serde.DataModelMapping.FieldMappingStrategy;
import com.asakusafw.directio.hive.serde.FieldPropertyDescriptor;
import com.asakusafw.directio.hive.serde.ValueSerde;
import com.asakusafw.directio.hive.serde.mock.MockSimple;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectInputFragment;
import com.asakusafw.runtime.directio.hadoop.StripedDataFormat;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.value.IntOption;
import com.asakusafw.runtime.value.StringOption;

/**
 * Test for {@link OrcFileFormat}.
 */
public class OrcFileFormatTest {

    /**
     * A temporary folder for testing.
     */
    @Rule
    public final TemporaryFolder folder = new TemporaryFolder();

    private <T> OrcFileFormat<T> format(Class<T> type, String... removes) {
        return format(type, Collections.<String, ValueSerde>emptyMap(), removes);
    }

    private <T> OrcFileFormat<T> format(Class<T> type, Map<String, ? extends ValueSerde> edits, String... removes) {
        OrcFileFormat<T> format = new OrcFileFormat<>("testing", new OrcFormatConfiguration(),
                new DataModelDescriptorEditor(FieldPropertyDescriptor.extract(type)).editAll(edits)
                        .removeAll(Arrays.asList(removes)).build());
        format.setConf(new org.apache.hadoop.conf.Configuration());
        return format;
    }

    /**
     * Test method for {@link AbstractOrcFileFormat#getFormatName()}.
     */
    @Test
    public void format_name() {
        assertThat(format(MockSimple.class).getFormatName(), equalTo("ORC"));
    }

    /**
     * Test method for {@link AbstractOrcFileFormat#getSupportedType()}.
     */
    @Test
    public void supported_type() {
        assertThat(format(MockSimple.class).getSupportedType(), equalTo((Object) MockSimple.class));
    }

    /**
     * {@code tblproperties} for default settings.
     */
    @Test
    public void table_properties_default() {
        Map<String, String> props = format(MockSimple.class).getTableProperties();
        assertThat(props.size(), is(2));
        assertThat(props, hasEntry("orc.compress", "SNAPPY"));
        assertThat(props, hasEntry("orc.stripe.size", String.valueOf(64L * 1024 * 1024)));
    }

    /**
     * {@code tblproperties} for custom settings.
     */
    @Test
    public void table_properties_custom() {
        long stripeSize = 99L * 1024 * 1024;
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        format.getFormatConfiguration().withFormatVersion(OrcFile.Version.V_0_11)
                .withCompressionKind(CompressionKind.ZLIB).withStripeSize(stripeSize);
        Map<String, String> props = format.getTableProperties();
        assertThat(props.size(), is(2));
        assertThat(props, hasEntry("orc.compress", "ZLIB"));
        assertThat(props, hasEntry("orc.stripe.size", String.valueOf(stripeSize)));
    }

    /**
     * simple I/O.
     * @throws Exception if failed
     */
    @Test
    public void io_simple() throws Exception {
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        MockSimple in = new MockSimple(100, "Hello, world!");
        MockSimple out = restore(format, in);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(in.string));
    }

    /**
     * I/O with projection.
     * @throws Exception if failed
     */
    @Test
    public void io_projection() throws Exception {
        OrcFileFormat<MockSimple> format1 = format(MockSimple.class);
        OrcFileFormat<MockSimple> format2 = format(MockSimple.class, "string");
        format2.getFormatConfiguration().withFieldMappingStrategy(FieldMappingStrategy.NAME)
                .withOnMissingTarget(ExceptionHandlingStrategy.IGNORE);

        MockSimple in = new MockSimple(100, "Hello, world!");
        File file = save(format1, Arrays.asList(in));
        List<MockSimple> restored = load(format2, file);

        assertThat(restored, hasSize(1));
        MockSimple out = restored.get(0);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(new StringOption())); // null
    }

    /**
     * I/O with fragment.
     * @throws Exception if failed
     */
    @Test
    public void io_fragment() throws Exception {
        File file = folder.newFile();
        Assume.assumeThat(file.delete() || file.exists() == false, is(true));

        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        try (ModelOutput<MockSimple> output = format.createOutput(MockSimple.class, fs, new Path(file.toURI()),
                new Counter());) {
            output.write(new MockSimple(100, "Hello, world!"));
        }
        assertThat(file.exists(), is(true));

        FileStatus stat = fs.getFileStatus(new Path(file.toURI()));
        List<DirectInputFragment> fragments = format.computeInputFragments(new StripedDataFormat.InputContext(
                MockSimple.class, Arrays.asList(stat), fs, -1L, -1L, false, false));

        assertThat(fragments, hasSize(1));
        DirectInputFragment first = fragments.get(0);

        try (ModelInput<MockSimple> input = format.createInput(MockSimple.class, fs, new Path(first.getPath()),
                first.getOffset(), first.getSize(), new Counter())) {
            MockSimple buf = new MockSimple();
            assertThat(input.readTo(buf), is(true));
            assertThat(buf.number, is(new IntOption(100)));
            assertThat(buf.string, is(new StringOption("Hello, world!")));

            assertThat(input.readTo(buf), is(false));
        }
    }

    /**
     * I/O with {@code 0.11}.
     * @throws Exception if failed
     */
    @Test
    public void io_v_0_11() throws Exception {
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        format.getFormatConfiguration().withFormatVersion(OrcFile.Version.V_0_11);
        MockSimple in = new MockSimple(100, "Hello, world!");
        MockSimple out = restore(format, in);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(in.string));
    }

    private <T> T restore(OrcFileFormat<T> format, T value) throws IOException, InterruptedException {
        List<T> in = new ArrayList<>();
        in.add(value);
        return restore(format, in).get(0);
    }

    private <T> List<T> restore(OrcFileFormat<T> format, List<T> values) throws IOException, InterruptedException {
        File file = save(format, values);
        List<T> results = load(format, file);
        assertThat(values, hasSize(results.size()));
        return results;
    }

    private <T> File save(OrcFileFormat<T> format, List<T> values) throws IOException, InterruptedException {
        File file = folder.newFile();
        Assume.assumeThat(file.delete() || file.exists() == false, is(true));
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        try (ModelOutput<T> output = format.createOutput(format.getSupportedType(), fs, new Path(file.toURI()),
                new Counter())) {
            for (T value : values) {
                output.write(value);
            }
        }
        assertThat(file.exists(), is(true));
        return file;
    }

    private <T> List<T> load(OrcFileFormat<T> format, File file) throws IOException, InterruptedException {
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        try (ModelInput<T> input = format.createInput(format.getSupportedType(), fs, new Path(file.toURI()), 0,
                file.length(), new Counter())) {
            List<T> results = new ArrayList<>();
            while (true) {
                @SuppressWarnings("unchecked")
                T value = (T) format.getDataModelDescriptor().createDataModelObject();
                if (input.readTo(value) == false) {
                    break;
                }
                results.add(value);
            }
            return results;
        }
    }
}