com.cloudera.recordservice.mr.RecordServiceRecord.java Source code

Introduction

Here is the source code for com.cloudera.recordservice.mr.RecordServiceRecord.java
Source

// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.cloudera.recordservice.mr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

import com.cloudera.recordservice.core.ByteArray;
import com.cloudera.recordservice.core.Records.Record;
import com.google.common.base.Preconditions;

public class RecordServiceRecord implements Writable {
    // Array of Writable objects. This is created once and reused.
    private Writable[] columnValObjects_;

    // The values for the current record. If column[i] is NULL,
    // columnVals_[i] is null, otherwise it is columnValObjects_[i].
    private Writable[] columnVals_;

    // Schema associated with columnVals_.
    private Schema schema_;

    // Map of column name to column value index in columnValues_.
    // TODO: Handle column names should be case-insensitive, we need to handle
    // that efficiently.
    private Map<String, Integer> colNameToIdx_;

    public RecordServiceRecord(Schema schema) {
        schema_ = schema;
        columnVals_ = new Writable[schema_.getNumColumns()];
        columnValObjects_ = new Writable[schema_.getNumColumns()];
        colNameToIdx_ = new HashMap<String, Integer>();
        for (int i = 0; i < schema_.getNumColumns(); ++i) {
            colNameToIdx_.put(schema.getColumnInfo(i).name, i);
            columnValObjects_[i] = getWritableInstance(schema.getColumnInfo(i).type.typeId);
        }
    }

    /**
     * Resets the data in this RecordServiceRecord by translating the column data from the
     * given Row to the internal array of Writables (columnVals_).
     * Reads the column data from the given Row into this RecordServiceRecord. The
     * schema are expected to match, minimal error checks are performed.
     * This is a performance critical method.
     */
    public void reset(Record record) {
        if (record.getSchema().cols.size() != schema_.getNumColumns()) {
            throw new IllegalArgumentException(String.format(
                    "Schema for new record does " + "not match existing schema: %d (new) != %d (existing)",
                    record.getSchema().cols.size(), schema_.getNumColumns()));
        }

        for (int i = 0; i < schema_.getNumColumns(); ++i) {
            if (record.isNull(i)) {
                columnVals_[i] = null;
                continue;
            }
            columnVals_[i] = columnValObjects_[i];
            com.cloudera.recordservice.core.Schema.ColumnDesc cInfo = schema_.getColumnInfo(i);
            Preconditions.checkNotNull(cInfo);
            switch (cInfo.type.typeId) {
            case BOOLEAN:
                ((BooleanWritable) columnValObjects_[i]).set(record.nextBoolean(i));
                break;
            case TINYINT:
                ((ByteWritable) columnValObjects_[i]).set(record.nextByte(i));
                break;
            case SMALLINT:
                ((ShortWritable) columnValObjects_[i]).set(record.nextShort(i));
                break;
            case INT:
                ((IntWritable) columnValObjects_[i]).set(record.nextInt(i));
                break;
            case BIGINT:
                ((LongWritable) columnValObjects_[i]).set(record.nextLong(i));
                break;
            case FLOAT:
                ((FloatWritable) columnValObjects_[i]).set(record.nextFloat(i));
                break;
            case DOUBLE:
                ((DoubleWritable) columnValObjects_[i]).set(record.nextDouble(i));
                break;

            case STRING:
            case VARCHAR:
            case CHAR:
                ByteArray s = record.nextByteArray(i);
                ((Text) columnValObjects_[i]).set(s.byteBuffer().array(), s.offset(), s.len());
                break;
            case TIMESTAMP_NANOS:
                ((TimestampNanosWritable) columnValObjects_[i]).set(record.nextTimestampNanos(i));
                break;
            case DECIMAL:
                ((DecimalWritable) columnValObjects_[i]).set(record.nextDecimal(i));
                break;
            default:
                throw new RuntimeException("Unsupported type: " + cInfo);
            }
        }
    }

    public Writable getColumnValue(int colIdx) {
        return columnVals_[colIdx];
    }

    /**
     * Returns the column value for the given column name, or null of the column name
     * does not exist in this record. This is on the hot path when running with Hive -
     * it is called for every record returned.
     */
    public Writable getColumnValue(String columnName) {
        Integer index = colNameToIdx_.get(columnName);
        if (index == null)
            return null;
        return getColumnValue(index);
    }

    @Override
    // TODO: what is the contract here? Handle NULLs
    public void write(DataOutput out) throws IOException {
        schema_.write(out);
        for (int i = 0; i < columnValObjects_.length; ++i) {
            columnValObjects_[i].write(out);
        }
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        schema_ = new Schema();
        schema_.readFields(in);
        columnValObjects_ = new Writable[schema_.getNumColumns()];
        for (int i = 0; i < columnValObjects_.length; i++) {
            columnValObjects_[i] = getWritableInstance(schema_.getColumnInfo(i).type.typeId);
            columnValObjects_[i].readFields(in);
        }
    }

    public Schema getSchema() {
        return schema_;
    }

    /**
     * Returns the corresponding Writable object for this column type.
     */
    public Writable getWritableInstance(com.cloudera.recordservice.core.Schema.Type type) {
        switch (type) {
        case BOOLEAN:
            return new BooleanWritable();
        case TINYINT:
            return new ByteWritable();
        case SMALLINT:
            return new ShortWritable();
        case INT:
            return new IntWritable();
        case BIGINT:
            return new LongWritable();
        case FLOAT:
            return new FloatWritable();
        case DOUBLE:
            return new DoubleWritable();
        case VARCHAR:
        case CHAR:
        case STRING:
            return new Text();
        case TIMESTAMP_NANOS:
            return new TimestampNanosWritable();
        case DECIMAL:
            return new DecimalWritable();
        default:
            throw new UnsupportedOperationException("Unexpected type: " + toString());
        }
    }
}