com.facebook.hive.orc.FileDump.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hive.orc.FileDump.java

Source

//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.hive.orc;

import com.facebook.hive.orc.compression.CompressionKind;
import com.facebook.hive.orc.statistics.ColumnStatistics;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;

import java.io.IOException;

/**
 * A tool for printing out the file structure of ORC files.
 */
public final class FileDump {
    private static final String STREAM_SECTION_INFO_FORMAT = "    Stream: column %d section %s start: %d length %d";

    // not used
    private FileDump() {
    }

    private static void printCompressionInformation(Reader reader) {
        System.out.println("Compression: " + reader.getCompression());
        if (reader.getCompression() != CompressionKind.NONE) {
            System.out.println("Compression size: " + reader.getCompressionSize());
        }
    }

    private static void printColumnStatistics(Reader reader) {
        final ColumnStatistics[] stats = reader.getStatistics();
        System.out.println("\nStatistics:");

        for (int i = 0; i < stats.length; ++i) {
            System.out.println("  Column " + i + ": " + stats[i].toString());
        }
    }

    private static void printColumnFooterEntry(OrcProto.StripeFooter footer, int col) {
        final StringBuilder buf = new StringBuilder();
        buf.append("    Encoding column ");
        buf.append(col);
        buf.append(": ");

        final OrcProto.ColumnEncoding encoding = footer.getColumns(col);
        buf.append(encoding.getKind());

        if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY) {
            buf.append("[");
            buf.append(encoding.getDictionarySize());
            buf.append("]");
        }
        System.out.println(buf);
    }

    private static void printStripeInformation(Reader reader, RecordReaderImpl rows) throws IOException {
        System.out.println("\nStripes:");

        for (final StripeInformation stripe : reader.getStripes()) {
            final long stripeStart = stripe.getOffset();
            System.out.println("  Stripe: " + stripe.toString());

            final OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
            long sectionStart = stripeStart;
            for (final OrcProto.Stream section : footer.getStreamsList()) {
                System.out.println(String.format(STREAM_SECTION_INFO_FORMAT, section.getColumn(), section.getKind(),
                        sectionStart, section.getLength()));
                sectionStart += section.getLength();
            }

            for (int col = 0; col < footer.getColumnsCount(); ++col) {
                printColumnFooterEntry(footer, col);
            }
        }
    }

    private static void processFile(String filename, Configuration conf) throws IOException {
        final Path path = new Path(filename);
        ReaderWriterProfiler.setProfilerOptions(conf);
        System.out.println("Structure for " + filename);

        final Reader reader = OrcFile.createReader(path.getFileSystem(conf), path, conf);
        final RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
        System.out.println("Rows: " + reader.getNumberOfRows());

        printCompressionInformation(reader);
        System.out.println("Raw data size: " + reader.getRawDataSize());
        System.out.println("Type: " + reader.getObjectInspector().getTypeName());

        printColumnStatistics(reader);
        printStripeInformation(reader, rows);
    }

    public static void main(String[] args) throws Exception {
        final Configuration conf = new Configuration();
        for (int i = 0; i < args.length; i++) {
            if (args[i].startsWith("-hiveconf")) {
                // Skip any -hiveconf args and its values
                i++;
                continue;
            }
            processFile(args[i], conf);
        }
    }
}