com.facebook.hive.orc.OrcOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hive.orc.OrcOutputFormat.java

Source

//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.hive.orc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;

import com.facebook.hive.orc.compression.CompressionKind;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;

import com.facebook.hive.orc.OrcSerde.OrcSerdeRow;

/**
 * A Hive OutputFormat for ORC files.
 */
public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
        implements HiveOutputFormat<NullWritable, OrcSerdeRow> {

    private static class OrcRecordWriter
            implements RecordWriter<NullWritable, OrcSerdeRow>, StatsProvidingRecordWriter {
        private Writer writer = null;
        private final FileSystem fs;
        private final Path path;
        private final Configuration conf;
        private final long stripeSize;
        private final int compressionSize;
        private final CompressionKind compress;
        private final int rowIndexStride;
        private final SerDeStats stats;

        OrcRecordWriter(FileSystem fs, Path path, Configuration conf, long stripeSize, String compress,
                int compressionSize, int rowIndexStride) {
            this.fs = fs;
            this.path = path;
            this.conf = conf;
            this.stripeSize = stripeSize;
            this.compress = CompressionKind.valueOf(compress);
            this.compressionSize = compressionSize;
            this.rowIndexStride = rowIndexStride;
            this.stats = new SerDeStats();
        }

        @Override
        public void write(NullWritable nullWritable, OrcSerdeRow row) throws IOException {
            if (writer == null) {
                writer = OrcFile.createWriter(fs, path, this.conf, row.getInspector(), stripeSize, compress,
                        compressionSize, rowIndexStride);
            }
            writer.addRow(row.getRow());
        }

        @Override
        public void write(Writable row) throws IOException {
            OrcSerdeRow serdeRow = (OrcSerdeRow) row;
            if (writer == null) {
                writer = OrcFile.createWriter(fs, path, this.conf, serdeRow.getInspector(), stripeSize, compress,
                        compressionSize, rowIndexStride);
            }
            writer.addRow(serdeRow.getRow());
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            close(true);
        }

        @Override
        public void close(boolean b) throws IOException {
            // if we haven't written any rows, we need to create a file with a
            // generic schema.
            if (writer == null) {
                // a row with no columns
                ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(
                        new ArrayList<String>(), new ArrayList<ObjectInspector>());
                writer = OrcFile.createWriter(fs, path, this.conf, inspector, stripeSize, compress, compressionSize,
                        rowIndexStride);
            }
            writer.close();
        }

        @Override
        public SerDeStats getStats() {
            stats.setRawDataSize(writer.getRowRawDataSize());
            return stats;
        }
    }

    @Override
    public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
            Progressable reporter) throws IOException {
        ReaderWriterProfiler.setProfilerOptions(conf);

        // To be compatible with older file formats like Sequence and RC
        // Only works if mapred.work.output.dir is set in the conf
        Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf);
        Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name);

        if (fileSystem == null && workOutputPath != null) {
            fileSystem = workOutputPath.getFileSystem(conf);
        }

        return new OrcRecordWriter(fileSystem, outputPath, conf,
                OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal,
                OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal,
                OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal,
                OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal);
    }

    @Override
    public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf conf, Path path,
            Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
            Progressable reporter) throws IOException {
        ReaderWriterProfiler.setProfilerOptions(conf);
        String stripeSizeStr = tableProperties.getProperty(OrcFile.STRIPE_SIZE);
        long stripeSize;
        if (stripeSizeStr != null) {
            stripeSize = Long.valueOf(stripeSizeStr);
        } else {
            stripeSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE);
        }

        String compression = tableProperties.getProperty(OrcFile.COMPRESSION);
        if (compression == null) {
            compression = OrcConf.getVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION);
        }

        String compressionSizeStr = tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE);
        int compressionSize;
        if (compressionSizeStr != null) {
            compressionSize = Integer.valueOf(compressionSizeStr);
        } else {
            compressionSize = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE);
        }

        String rowIndexStrideStr = tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE);
        int rowIndexStride;
        if (rowIndexStrideStr != null) {
            rowIndexStride = Integer.valueOf(rowIndexStrideStr);
        } else {
            rowIndexStride = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE);
        }

        String enableIndexesStr = tableProperties.getProperty(OrcFile.ENABLE_INDEXES);
        boolean enableIndexes;
        if (enableIndexesStr != null) {
            enableIndexes = Boolean.valueOf(enableIndexesStr);
        } else {
            enableIndexes = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_CREATE_INDEX);
        }

        if (!enableIndexes) {
            rowIndexStride = 0;
        }

        return new OrcRecordWriter(path.getFileSystem(conf), path, conf, stripeSize, compression, compressionSize,
                rowIndexStride);
    }
}