org.kiji.mapreduce.framework.KijiTableInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.kiji.mapreduce.framework.KijiTableInputFormat.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kiji.mapreduce.framework;

import java.io.IOException;

import com.google.common.base.Preconditions;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.delegation.Lookups;
import org.kiji.schema.EntityId;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiURI;
import org.kiji.schema.filter.KijiRowFilter;
import org.kiji.schema.hbase.HBaseKijiURI;

/** InputFormat for Hadoop MapReduce jobs reading from a Kiji table. */
@ApiAudience.Framework
@ApiStability.Stable
public abstract class KijiTableInputFormat extends InputFormat<EntityId, KijiRowData> implements Configurable {

    /** Static factory class for getting instances of the appropriate  KijiTableInputFormatFactory. */
    public static final class Factory {
        /**
         * Returns a KijiFactory for the appropriate type of Kiji (HBase or Cassandra), based on a URI.
         *
         * @param uri for the Kiji instance to build with the factory.
         * @return the default KijiFactory.
         */
        public static KijiTableInputFormatFactory get(KijiURI uri) {
            KijiTableInputFormatFactory instance;
            String scheme = uri.getScheme();
            if (scheme.equals(KijiURI.KIJI_SCHEME)) {
                scheme = HBaseKijiURI.HBASE_SCHEME;
            }
            synchronized (Kiji.Factory.class) {
                instance = Lookups.getNamed(KijiTableInputFormatFactory.class).lookup(scheme);
                assert (null != instance);
            }
            return instance;
        }
    }

    /**
     * Configures a Hadoop M/R job to read from a given table.
     *
     * @param job Job to configure.
     * @param tableURI URI of the table to read from.
     * @param dataRequest Data request.
     * @param startRow Minimum row key to process. May be left null to indicate
     *     that scanning should start at the beginning of the table.
     * @param endRow Maximum row key to process. May be left null to indicate that
     *     scanning should continue to the end of the table.
     * @param filter Filter to use for scanning. May be left null.
     * @throws IOException on I/O error.
     */
    public static void configureJob(Job job, KijiURI tableURI, KijiDataRequest dataRequest, EntityId startRow,
            EntityId endRow, KijiRowFilter filter) throws IOException {
        Preconditions.checkNotNull(job, "job must not be null");
        Preconditions.checkNotNull(tableURI, "tableURI must not be null");
        Preconditions.checkNotNull(dataRequest, "dataRequest must not be null");

        final Configuration conf = job.getConfiguration();

        // TODO: Check for jars config:
        // GenericTableMapReduceUtil.initTableInput(hbaseTableName, scan, job);

        // Write all the required values to the job's configuration object.
        final String serializedRequest = Base64.encodeBase64String(SerializationUtils.serialize(dataRequest));
        conf.set(KijiConfKeys.KIJI_INPUT_DATA_REQUEST, serializedRequest);
        conf.set(KijiConfKeys.KIJI_INPUT_TABLE_URI, tableURI.toString());
        if (null != startRow) {
            conf.set(KijiConfKeys.KIJI_START_ROW_KEY, Base64.encodeBase64String(startRow.getHBaseRowKey()));
        }
        if (null != endRow) {
            conf.set(KijiConfKeys.KIJI_LIMIT_ROW_KEY, Base64.encodeBase64String(endRow.getHBaseRowKey()));
        }
        if (null != filter) {
            conf.set(KijiConfKeys.KIJI_ROW_FILTER, filter.toJson().toString());
        }
    }
}