com.facebook.hiveio.benchmark.InputBenchmark.java Source code

Introduction

Here is the source code for com.facebook.hiveio.benchmark.InputBenchmark.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hiveio.benchmark;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.slf4j.LoggerFactory;

import com.facebook.hiveio.common.HadoopNative;
import com.facebook.hiveio.common.HiveUtils;
import com.facebook.hiveio.input.HiveApiInputFormat;
import com.facebook.hiveio.input.HiveInputDescription;
import com.facebook.hiveio.record.HiveReadableRecord;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import com.yammer.metrics.reporting.ConsoleReporter;

import java.io.IOException;
import java.util.List;

import static com.facebook.hiveio.input.HiveApiInputFormat.DEFAULT_PROFILE_ID;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

/**
 * Benchmark for input reading
 */
class InputBenchmark {
    /** Logger */
    private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(InputBenchmark.class);

    /**
     * Run benchmark
     *
     * @param args parsed args
     * @throws Exception
     */
    public void run(InputBenchmarkCmd args) throws Exception {
        HadoopNative.requireHadoopNative();

        Timer allTime = Metrics.newTimer(InputBenchmark.class, "all-time", MILLISECONDS, MILLISECONDS);
        TimerContext allTimerContext = allTime.time();

        HiveInputDescription input = new HiveInputDescription();
        input.getTableDesc().setDatabaseName(args.tableOpts.database);
        input.getTableDesc().setTableName(args.tableOpts.table);
        input.setPartitionFilter(args.tableOpts.partitionFilter);
        input.getMetastoreDesc().setHost(args.metastoreOpts.host);
        input.getMetastoreDesc().setPort(args.metastoreOpts.port);

        HiveConf hiveConf = HiveUtils.newHiveConf(InputBenchmark.class);

        System.err.println("Initialize profile with input data");
        HiveApiInputFormat.setProfileInputDesc(hiveConf, input, DEFAULT_PROFILE_ID);

        HiveApiInputFormat defaultInputFormat = new HiveApiInputFormat();
        if (args.trackMetrics) {
            defaultInputFormat.setObserver(new MetricsObserver("default", args.recordPrintPeriod));
        }

        List<InputSplit> splits = defaultInputFormat.getSplits(new JobContext(hiveConf, new JobID()));
        System.err.println("getSplits returned " + splits.size() + " splits");

        long numRows = 0;
        for (int i = 0; i < splits.size(); ++i) {
            InputSplit split = splits.get(i);
            TaskAttemptID taskID = new TaskAttemptID();
            TaskAttemptContext taskContext = new TaskAttemptContext(hiveConf, taskID);
            if (i % args.splitPrintPeriod == 0) {
                System.err.println("Handling split " + i + " of " + splits.size());
            }
            RecordReader<WritableComparable, HiveReadableRecord> reader = defaultInputFormat
                    .createRecordReader(split, taskContext);
            reader.initialize(split, taskContext);
            numRows += readFully(reader);
        }

        System.err.println("Parsed " + numRows + " rows");

        allTimerContext.stop();

        new ConsoleReporter(System.err).run();
    }

    /**
     * Read all records from a RecordReader
     *
     * @param reader RecordReader
     * @return number of rows
     * @throws IOException I/O errors
     * @throws InterruptedException thread errors
     */
    private static long readFully(RecordReader<WritableComparable, HiveReadableRecord> reader)
            throws IOException, InterruptedException {
        long num = 0;
        while (reader.nextKeyValue()) {
            HiveReadableRecord record = reader.getCurrentValue();
            parseLongLongDouble(record);
            ++num;
        }
        return num;
    }

    /**
     * Parse a long-long-double record
     *
     * @param record to parse
     */
    private static void parseLongLongDouble(HiveReadableRecord record) {
        record.getLong(0);
        record.getLong(1);
        record.getDouble(2);
    }
}