com.facebook.hiveio.output.OutputInfo.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hiveio.output.OutputInfo.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hiveio.output;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.facebook.hiveio.common.Classes;
import com.facebook.hiveio.common.SerDes;
import com.facebook.hiveio.common.Writables;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * Holds information for Hive output
 */
class OutputInfo implements Writable {
    /** Logger */
    private static final Logger LOG = LoggerFactory.getLogger(OutputInfo.class);

    /** Parameters for Hive table */
    private final Map<String, String> tableParams;

    /** Class for writing */
    private Class<? extends OutputFormat> outputFormatClass;

    /** Partition column information */
    private final List<FieldSchema> partitionInfo;
    /** Regular column information */
    private final List<FieldSchema> columnInfo;

    /** Class used for serialization */
    private Class<? extends SerDe> serializerClass;
    /** Parameters for serializer */
    private final Map<String, String> serializerParams;

    /** Path to table root in Hadoop */
    private String tableRoot;
    /**
     * Path to specific partition we're writing. If not partitioned, this will be
     * the same as tablePath
     */
    private String partitionPath;
    /**
     * Path to where we're writing to. If partitioned this is the same as
     * partitionPath, otherwise this is a temporary path.
     */
    private String finalOutputPath;

    /**
     * Default constructor
     */
    public OutputInfo() {
        this.tableParams = Maps.newHashMap();
        this.partitionInfo = Lists.newArrayList();
        this.columnInfo = Lists.newArrayList();
        this.serializerClass = null;
        this.serializerParams = Maps.newHashMap();
    }

    /**
     * Construct from Hive table
     * @param table Hive table to grab information from
     */
    public OutputInfo(Table table) {
        partitionInfo = table.getPartitionKeys();

        StorageDescriptor storageDescriptor = table.getSd();
        tableParams = table.getParameters();
        outputFormatClass = Classes.classForName(storageDescriptor.getOutputFormat());
        columnInfo = storageDescriptor.getCols();
        tableRoot = storageDescriptor.getLocation();

        SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
        serializerClass = SerDes.getSerDeClass(serDeInfo);
        serializerParams = serDeInfo.getParameters();
    }

    public String getTableRoot() {
        return tableRoot;
    }

    public List<FieldSchema> getColumnInfo() {
        return columnInfo;
    }

    public Map<String, String> getTableParams() {
        return tableParams;
    }

    public Class<? extends OutputFormat> getOutputFormatClass() {
        return outputFormatClass;
    }

    public Class<? extends Serializer> getSerializerClass() {
        return serializerClass;
    }

    public Map<String, String> getSerializerParams() {
        return serializerParams;
    }

    public List<FieldSchema> getPartitionInfo() {
        return partitionInfo;
    }

    /**
     * Check if this table has any partition info
     * @return true if we have partition information
     */
    public boolean hasPartitionInfo() {
        return partitionInfo != null && !partitionInfo.isEmpty();
    }

    public String getPartitionPath() {
        return partitionPath;
    }

    /**
     * Set partition path
     *
     * @param partitionPath path to partition data
     * @return this
     */
    public OutputInfo setPartitionPath(String partitionPath) {
        LOG.info("Setting partition path to {}", partitionPath);
        this.partitionPath = partitionPath;
        return this;
    }

    public String getFinalOutputPath() {
        return finalOutputPath;
    }

    /**
     *  Set final output path
     *
     * @param finalOutputPath path to final result
     * @return this
     */
    public OutputInfo setFinalOutputPath(String finalOutputPath) {
        LOG.info("Setting final output path to {}", finalOutputPath);
        this.finalOutputPath = finalOutputPath;
        return this;
    }

    /**
     * Create Serializer using Configuration passed in
     *
     * @param conf Configuration to use
     * @return A new, configured, Serializer
     */
    public Serializer createSerializer(Configuration conf) {
        Serializer serializer = ReflectionUtils.newInstance(serializerClass, conf);
        SerDes.initSerializer(serializer, conf, columnInfo, serializerParams);
        return serializer;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        Writables.writeClassName(out, Preconditions.checkNotNull(outputFormatClass));
        Writables.writeFieldSchemas(out, partitionInfo);
        Writables.writeStrStrMap(out, tableParams);
        Writables.writeFieldSchemas(out, columnInfo);
        WritableUtils.writeString(out, Preconditions.checkNotNull(tableRoot));
        WritableUtils.writeString(out, Preconditions.checkNotNull(partitionPath));
        WritableUtils.writeString(out, Preconditions.checkNotNull(finalOutputPath));
        Writables.writeClassName(out, Preconditions.checkNotNull(serializerClass));
        Writables.writeStrStrMap(out, serializerParams);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        outputFormatClass = Writables.readClass(in);
        Writables.readFieldSchemas(in, partitionInfo);
        Writables.readStrStrMap(in, tableParams);
        Writables.readFieldSchemas(in, columnInfo);
        tableRoot = WritableUtils.readString(in);
        partitionPath = WritableUtils.readString(in);
        finalOutputPath = WritableUtils.readString(in);
        serializerClass = Writables.readClass(in);
        Writables.readStrStrMap(in, serializerParams);
    }
}