com.hotels.corc.Corc.java Source code

Java tutorial

Introduction

Here is the source code for com.hotels.corc.Corc.java

Source

/**
 * Copyright 2015 Expedia Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hotels.corc;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * {@link OrcStruct} wrapper, allowing access by field name with automatic {@link Writable} to Java type conversion
 * using the provided {@link ConverterFactory}.
 */
public class Corc implements Writable {
    private static final Logger LOG = LoggerFactory.getLogger(Corc.class);

    private final OrcSerde serde = new OrcSerde();
    private final SettableStructObjectInspector inspector;
    private final OrcStruct struct;
    private final ConverterFactory factory;
    private final Map<String, ValueMarshaller> cache = new HashMap<>();
    private final RecordIdentifier recordIdentifier;

    public Corc(StructTypeInfo typeInfo, ConverterFactory factory) {
        LOG.debug("TypeInfo: {}", typeInfo);
        inspector = (SettableStructObjectInspector) OrcStruct.createObjectInspector(typeInfo);
        struct = (OrcStruct) inspector.create();
        this.factory = factory;
        recordIdentifier = new RecordIdentifier();
    }

    private ValueMarshaller getValueMarshaller(String fieldName) {
        ValueMarshaller valueMarshaller = cache.get(fieldName);
        if (valueMarshaller == null) {
            StructField structField = inspector.getStructFieldRef(fieldName.toLowerCase());
            if (structField == null) {
                valueMarshaller = ValueMarshaller.NULL;
            } else {
                Converter converter = factory.newConverter(structField.getFieldObjectInspector());
                valueMarshaller = new ValueMarshallerImpl(inspector, structField, converter);
            }
            cache.put(fieldName, valueMarshaller);
        }
        return valueMarshaller;
    }

    /**
     * Gets the value for {@code fieldName} converted to the appropriate java type
     *
     * @throws IOException
     */
    public Object get(String fieldName) throws IOException {
        Object value = getValueMarshaller(fieldName).getJavaObject(struct);
        LOG.debug("Fetched {}={}", fieldName, value);
        return value;
    }

    /**
     * Sets the value for {@code fieldName}, first converting it to the appropriate {@link Writable} type
     *
     * @throws IOException
     */
    public void set(String fieldName, Object value) throws IOException {
        getValueMarshaller(fieldName).setWritableObject(struct, value);
        LOG.debug("Set {}={}", fieldName, value);
    }

    public RecordIdentifier getRecordIdentifier() {
        RecordIdentifier copy = new RecordIdentifier();
        copy.set(recordIdentifier);
        LOG.debug("Fetched recordIdentifier={}", recordIdentifier);
        return copy;
    }

    public void setRecordIdentifier(RecordIdentifier recordIdentifier) {
        this.recordIdentifier.set(recordIdentifier);
        LOG.debug("Set recordIdentifier={}", recordIdentifier);
    }

    public OrcStruct getOrcStruct() {
        return struct;
    }

    public SettableStructObjectInspector getInspector() {
        return inspector;
    }

    public Object serialize() {
        return serde.serialize(struct, inspector);
    }

    @Override
    public void write(DataOutput out) throws IOException {
        throw new UnsupportedOperationException(Corc.class.getName() + " cannot  be used for writing.");
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        throw new UnsupportedOperationException(Corc.class.getName() + " cannot be used for reading.");
    }

}