Java tutorial
/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.input; import java.io.IOException; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.SerializationUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.mapreduce.GenericTableMapReduceUtil; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.kiji.mapreduce.KijiConfKeys; import org.kiji.mapreduce.KijiTableInputFormat; import org.kiji.mapreduce.MapReduceJobInput; import org.kiji.schema.EntityId; import org.kiji.schema.InternalKijiError; import org.kiji.schema.Kiji; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiManagedHBaseTableName; import org.kiji.schema.KijiSchemaTable; import org.kiji.schema.filter.KijiRowFilter; import org.kiji.schema.filter.KijiRowFilterApplicator; import org.kiji.schema.impl.HBaseDataRequestAdapter; import org.kiji.schema.impl.HBaseKijiTable; import org.kiji.schema.layout.InvalidLayoutException; import org.kiji.schema.layout.KijiTableLayout; /** * Input for a MapReduce job that uses data from a Kiji table. * * <p>The input is Kiji table column data as specified by a <code>KijiDataRequest</code>. * Input may be read from the entire table, or from a range of rows using a start and end * key.</p> */ public class KijiTableMapReduceJobInput extends MapReduceJobInput { /** The table to read the job input from. */ private final HBaseKijiTable mInputTable; /** Specifies which columns and versions of cells to read from the table. */ private final KijiDataRequest mDataRequest; /** Optional settings that specify which rows from the input table should be included. */ private final RowOptions mRowOptions; /** * Options that specify which rows from the input table should be included. * * <p>The settings here are used conjunctively with an AND operator. In other words, a * row will be included if and only if it is: * <ul> * <li>lexicographically equal to or after the start row, <em>and</em></li> * <li>lexicographically before the limit row, <em>and</em></li> * <li>accepted by the row filter.</li> * </ul> * </p> */ public static class RowOptions { /** * The start of the row range to read from the table (inclusive). Use null to include * the first row in the table. */ private final EntityId mStartRow; /** * The end of the row range to read from the table (exclusive). Use null to include * the last row in the table. */ private final EntityId mLimitRow; /** * A row filter that specifies whether a row from the table should be excluded. Use * null to include all rows. */ private final KijiRowFilter mRowFilter; /** Constructs options with default settings to include all the rows of the table. */ public RowOptions() { this(null, null, null); } /** * Creates a new <code>RowOptions</code> instance. * * @param startRow The start row (inclusive). * @param limitRow The limit row (exclusive). * @param rowFilter A row filter. */ public RowOptions(EntityId startRow, EntityId limitRow, KijiRowFilter rowFilter) { mStartRow = startRow; mLimitRow = limitRow; mRowFilter = rowFilter; } /** @return The start row (inclusive, may be null to include the first row of the table). */ public EntityId getStartRow() { return mStartRow; } /** @return The limit row (exclusive, may be null to include the last row of the table). */ public EntityId getLimitRow() { return mLimitRow; } /** @return The row filter (may be null). */ public KijiRowFilter getRowFilter() { return mRowFilter; } } /** * Constructs job input from column data in a Kiji table over a row range. * * @param inputTable The table to read input from. * @param dataRequest Specifies the columns and versions of cells to read from the table. * @param rowOptions Specifies optional settings for restricting the input from the * table to some subset of the rows. */ public KijiTableMapReduceJobInput(HBaseKijiTable inputTable, KijiDataRequest dataRequest, RowOptions rowOptions) { // TODO(WIBI-1667): Validate these arguments. mInputTable = inputTable; mDataRequest = dataRequest; mRowOptions = rowOptions; } /** {@inheritDoc} */ @Override public void configure(Job job) throws IOException { // Configure the input format class. super.configure(job); // Get the name of the HBase table that stores the Kiji table data. String hbaseTableName = KijiManagedHBaseTableName .getKijiTableName(mInputTable.getKiji().getName(), mInputTable.getName()).toString(); // Create the HBase scan configured to read the appropriate input from the Kiji table. Scan configuredScan = createConfiguredScan(mInputTable.getLayout()); // Configure the table input using HBase. GenericTableMapReduceUtil.initTableInput(hbaseTableName, configuredScan, job); final Configuration conf = job.getConfiguration(); final String serializedRequest = Base64.encodeBase64String(SerializationUtils.serialize(mDataRequest)); conf.set(KijiConfKeys.INPUT_DATA_REQUEST, serializedRequest); final Kiji kiji = mInputTable.getKiji(); // TODO(KIJI-144): Move this to KijiTable.getTableURI() conf.set(KijiConfKeys.INPUT_TABLE_URI, String.format( "kiji://%s:%s/%s/%s", kiji.getConf().get(HConstants.ZOOKEEPER_QUORUM), kiji.getConf() .getInt(HConstants.ZOOKEEPER_CLIENT_PORT, HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT), kiji.getName(), mInputTable.getName())); } /** {@inheritDoc} */ @Override protected Class<? extends InputFormat<?, ?>> getInputFormatClass() { return KijiTableInputFormat.class; } /** * Constructs an HBase Scan object configured to provide the appropriate data from the HBase * table to the MapReduce job according to the data request and row range. * * @param tableLayout The layout of the table to use as input. * @return An HBase Scan descriptor that reads the data from the HTable. * @throws IOException If there is an error. */ private Scan createConfiguredScan(KijiTableLayout tableLayout) throws IOException { // Build the HBase Scan from the data request. HBaseDataRequestAdapter hbaseDataRequestAdapter = new HBaseDataRequestAdapter(mDataRequest); Scan scan; try { scan = hbaseDataRequestAdapter.toScan(tableLayout); } catch (InvalidLayoutException e) { throw new InternalKijiError("Encountered an invalid table layout while configuring a job"); } configureScanWithRowOptions(scan); return scan; } /** * Configure the scan according to the row options. * * @param scan The HBase scan descriptor to configure. * @throws IOException If there is an error. */ private void configureScanWithRowOptions(Scan scan) throws IOException { if (null != mRowOptions.getStartRow()) { scan.setStartRow(mRowOptions.getStartRow().getHBaseRowKey()); } if (null != mRowOptions.getLimitRow()) { scan.setStopRow(mRowOptions.getLimitRow().getHBaseRowKey()); } if (null != mRowOptions.getRowFilter()) { KijiTableLayout tableLayout = mInputTable.getLayout(); KijiSchemaTable schemaTable = mInputTable.getKiji().getSchemaTable(); new KijiRowFilterApplicator(mRowOptions.getRowFilter(), tableLayout, schemaTable).applyTo(scan); } } }