com.asakusafw.runtime.io.text.directio.AbstractTextStreamFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.io.text.directio.AbstractTextStreamFormat.java

Source

/**
 * Copyright 2011-2017 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.io.text.directio;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;

import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.ReflectionUtils;

import com.asakusafw.runtime.directio.hadoop.ConfigurableBinaryStreamFormat;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.text.TextFormat;
import com.asakusafw.runtime.io.text.TextInput;
import com.asakusafw.runtime.io.text.TextOutput;
import com.asakusafw.runtime.io.text.driver.InputOption;
import com.asakusafw.runtime.io.text.driver.OutputOption;
import com.asakusafw.runtime.io.text.driver.RecordDefinition;
import com.asakusafw.runtime.value.StringOption;

/**
 * An abstract implementation of Direct I/O data format for formatted text files.
 * @param <T> the data type
 * @since 0.9.1
 */
public abstract class AbstractTextStreamFormat<T> extends ConfigurableBinaryStreamFormat<T> {

    private static final Set<InputOption> INPUT_OPTS_HEAD = Collections
            .unmodifiableSet(EnumSet.of(InputOption.FROM_FILE_HEAD));

    private static final Set<InputOption> INPUT_OPTS_REST = Collections.emptySet();

    private static final Set<OutputOption> OUTPUT_OPTS = Collections.emptySet();

    private final AtomicReference<TextFormat> textFormatCache = new AtomicReference<>();

    private final AtomicReference<RecordDefinition<T>> recordDefinitionCache = new AtomicReference<>();

    /**
     * Returns the {@link TextFormat}.
     * @return the text format
     */
    protected abstract TextFormat createTextFormat();

    /**
     * Returns the {@link RecordDefinition}.
     * @return the record structure definition
     */
    protected abstract RecordDefinition<T> createRecordDefinition();

    /**
     * Returns the input splitter of this format.
     * @return the input splitter, or {@code null} is input split is disabled
     */
    protected InputSplitter getInputSplitter() {
        return null;
    }

    /**
     * Returns the compression codec class.
     * @return the compression codec class, or {@code null} if compression is disabled
     */
    protected Class<? extends CompressionCodec> getCompressionCodecClass() {
        return null;
    }

    /**
     * Processes the data object, which has filled the current record data.
     * @param model the data object
     * @param path the current input path
     * @param input the current input
     */
    protected void afterInput(T model, StringOption path, TextInput<T> input) {
        return;
    }

    /**
     * Process the data object.
     * @param model the data object
     * @param path the current output path
     * @param output the current output
     */
    protected void beforeOutput(T model, StringOption path, TextOutput<T> output) {
        return;
    }

    /**
     * Returns the {@link TextFormat}.
     * @return the text format
     */
    public final TextFormat getTextFormat() {
        return cached(this::createTextFormat, textFormatCache);
    }

    /**
     * Returns the {@link RecordDefinition}.
     * @return the record structure definition
     */
    public final RecordDefinition<T> getRecordDefinition() {
        return cached(this::createRecordDefinition, recordDefinitionCache);
    }

    private static <U> U cached(Supplier<U> factory, AtomicReference<U> cache) {
        U cached = cache.get();
        if (cached != null) {
            return cached;
        }
        cache.compareAndSet(null, factory.get());
        return cache.get();
    }

    @Override
    public final long getPreferredFragmentSize() {
        InputSplitter splitter = getInputSplitter();
        if (splitter != null) {
            return splitter.getPreferredSize();
        } else {
            return -1L;
        }
    }

    @Override
    public final long getMinimumFragmentSize() {
        InputSplitter splitter = getInputSplitter();
        if (splitter != null) {
            return splitter.getLowerLimitSize();
        } else {
            return -1L;
        }
    }

    @Override
    public ModelInput<T> createInput(Class<? extends T> dataType, String path, InputStream stream, long offset,
            long fragmentSize) throws IOException, InterruptedException {
        TextFormat format = getTextFormat();
        RecordDefinition<T> record = getRecordDefinition();
        Set<InputOption> options = offset == 0 ? INPUT_OPTS_HEAD : INPUT_OPTS_REST;
        InputStream source = decorate(stream, offset, fragmentSize);
        return new DecoratedInput(record.newInput(format.open(source), path, options), new StringOption(path));
    }

    @Override
    public ModelOutput<T> createOutput(Class<? extends T> dataType, String path, OutputStream stream)
            throws IOException, InterruptedException {
        TextFormat format = getTextFormat();
        RecordDefinition<T> record = getRecordDefinition();
        Set<OutputOption> options = OUTPUT_OPTS;
        OutputStream destination = decorate(stream);
        return new DecoratedOutput(record.newOutput(format.open(destination), path, options),
                new StringOption(path));
    }

    private InputStream decorate(InputStream stream, long offset, long splitSize) throws IOException {
        InputSplitter splitter = getInputSplitter();
        if (splitter != null) {
            assert getCompressionCodecClass() == null;
            return splitter.trim(stream, offset, splitSize != -1L ? splitSize : Long.MAX_VALUE);
        }
        Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
        if (codecClass != null) {
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
            return codec.createInputStream(stream);
        }
        return stream;
    }

    private OutputStream decorate(OutputStream stream) throws IOException {
        Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
        if (codecClass != null) {
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
            return codec.createOutputStream(stream);
        }
        return stream;
    }

    private class DecoratedInput implements ModelInput<T> {

        private final TextInput<T> input;

        private final StringOption path;

        DecoratedInput(TextInput<T> input, StringOption path) {
            this.input = input;
            this.path = path;
        }

        @Override
        public boolean readTo(T model) throws IOException {
            if (input.readTo(model)) {
                afterInput(model, path, input);
                return true;
            }
            return false;
        }

        @Override
        public void close() throws IOException {
            input.close();
        }
    }

    private class DecoratedOutput implements ModelOutput<T> {

        private final TextOutput<T> output;

        private final StringOption path;

        DecoratedOutput(TextOutput<T> output, StringOption path) {
            this.output = output;
            this.path = path;
        }

        @Override
        public void write(T model) throws IOException {
            beforeOutput(model, path, output);
            output.write(model);
        }

        @Override
        public void close() throws IOException {
            output.close();
        }
    }
}