org.apache.hcatalog.hbase.HBaseInputFormat.java Source code

Introduction

Here is the source code for org.apache.hcatalog.hbase.HBaseInputFormat.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hcatalog.hbase;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableSplit;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.mapred.HCatMapRedUtil;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.mapreduce.InputJobInfo;

/**
 * This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase.
 */
class HBaseInputFormat implements InputFormat<ImmutableBytesWritable, Result> {

    private final TableInputFormat inputFormat;

    public HBaseInputFormat() {
        inputFormat = new TableInputFormat();
    }

    /*
     * @param instance of InputSplit
     *
     * @param instance of TaskAttemptContext
     *
     * @return RecordReader
     *
     * @throws IOException
     *
     * @throws InterruptedException
     *
     * @see
     * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache
     * .hadoop.mapreduce.InputSplit,
     * org.apache.hadoop.mapreduce.TaskAttemptContext)
     */
    @Override
    public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
            Reporter reporter) throws IOException {
        String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO);
        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString);

        String tableName = job.get(TableInputFormat.INPUT_TABLE);
        TableSplit tSplit = (TableSplit) split;
        HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job);
        inputFormat.setConf(job);
        Scan inputScan = inputFormat.getScan();
        // TODO: Make the caching configurable by the user
        inputScan.setCaching(200);
        inputScan.setCacheBlocks(false);
        Scan sc = new Scan(inputScan);
        sc.setStartRow(tSplit.getStartRow());
        sc.setStopRow(tSplit.getEndRow());
        recordReader.setScan(sc);
        recordReader.setHTable(new HTable(job, tableName));
        recordReader.init();
        return recordReader;
    }

    /*
     * @param jobContext
     *
     * @return List of InputSplit
     *
     * @throws IOException
     *
     * @throws InterruptedException
     *
     * @see
     * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce
     * .JobContext)
     */
    @Override
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
        inputFormat.setConf(job);
        return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, Reporter.NULL)));
    }

    private InputSplit[] convertSplits(List<org.apache.hadoop.mapreduce.InputSplit> splits) {
        InputSplit[] converted = new InputSplit[splits.size()];
        for (int i = 0; i < splits.size(); i++) {
            org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = (org.apache.hadoop.hbase.mapreduce.TableSplit) splits
                    .get(i);
            TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), tableSplit.getStartRow(),
                    tableSplit.getEndRow(), tableSplit.getRegionLocation());
            converted[i] = newTableSplit;
        }
        return converted;
    }

}